diff --git a/docs/content/rules/sort-regexp.mdx b/docs/content/rules/sort-regexp.mdx new file mode 100644 index 000000000..c2b10717e --- /dev/null +++ b/docs/content/rules/sort-regexp.mdx @@ -0,0 +1,272 @@ +--- +title: sort-regexp +description: Keep regular expressions predictable by sorting flags, character classes, and alternation branches. This ESLint rule helps you avoid subtle regex bugs and improves readability +shortDescription: Enforce sorted regular expressions +keywords: + - eslint + - regex sorting + - regular expressions + - eslint rule + - code quality + - javascript linting +--- + +import CodeExample from '../../components/CodeExample.svelte' +import CodeTabs from '../../components/CodeTabs.svelte' +import dedent from 'dedent' + +Enforce consistent ordering in regular expressions: flags, character-class elements, and alternation branches inside capture groups or the top-level pattern. + +Large patterns become easier to maintain when related branches are grouped and repeatedly used flags or character classes follow the same order. This rule helps you spot missing alternatives, avoid duplicated work, and keeps refactors safer by applying one sorting strategy across your codebase. + +The rule is **safe** – it only reorders elements without changing their meaning. + +## Try it out + +administrator|guest|user)/, + characterClass: /[0-9A-Za-fz]/, + flags: /pattern/gimsuy, + } + `} + lineLength={dedent` + const patterns = { + alternatives: /(banana|orange|apple|pear)/, + alias: /(?administrator|guest|user)/, + characterClass: /[0-9A-Za-fz]/, + flags: /pattern/yusmig, + } + `} + initial={dedent` + const patterns = { + alternatives: /(pear|apple|orange|banana)/, + alias: /(?user|administrator|guest)/, + characterClass: /[z0-9a-fA-Z]/, + flags: /pattern/yusmig, + } + `} + client:load + lang="tsx" +/> + +## Options + +This rule accepts an options object with the following properties: + +### type + +default: `'alphabetical'` + +Specifies the sorting strategy applied to matches (flags, character-class elements, and alternation branches). + +- `'alphabetical'` — Sort values alphabetically using [`localeCompare`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/localeCompare). +- `'natural'` — Sort values in [natural](https://github.com/yobacca/natural-orderby) order (e.g., `(?=2)` comes before `(?=10)`). +- `'line-length'` — Sort values by their textual length. +- `'custom'` — Sort values according to a custom alphabet defined in [`alphabet`](#alphabet). +- `'unsorted'` — Do not reorder values. [`groups`](#groups) and [`customGroups`](#customgroups) are still applied. + +### order + +default: `'asc'` + +Specifies the direction of sorting. + +- `'asc'` — Ascending order (shorter to longer, A to Z). +- `'desc'` — Descending order (longer to shorter, Z to A). + +### fallbackSort + + + type: + ```ts + { + type: 'alphabetical' | 'natural' | 'line-length' | 'custom' | 'unsorted' + order?: 'asc' | 'desc' + } + ``` + +default: `{ type: 'unsorted' }` + +Used when the primary [`type`](#type) considers two values equal. For example, rely on alphabetical order when two branches share the same length. + +### alphabet + +default: `''` + +Defines the custom alphabet for `'custom'` sorting. Use the [`Alphabet` helper](https://perfectionist.dev/alphabet) to build consistent alphabets quickly. + +### locales + +default: `'en-US'` + +Locales passed to `localeCompare` when alphabetical comparisons are required. + +### ignoreCase + +default: `true` + +Whether comparisons should be case-insensitive. + +- `true` — Treat upper- and lowercase branches as equal (default). +- `false` — Preserve case sensitivity. + +### specialCharacters + +default: `'keep'` + +Controls how special characters are handled before comparison. + +- `'keep'` — Keep them (default). +- `'trim'` — Trim leading special characters. +- `'remove'` — Remove all special characters. + +### ignoreAlias + +default: `false` + +Determines whether named capturing group aliases (e.g., `(?...)`) are considered during comparison. + +- `false` — Sort by `alias: pattern` (default). +- `true` — Ignore the alias and only compare the branch content. + +This is useful when you only care about the pattern and not how it is aliased. + +### customGroups + + + type: `Array` + +default: `[]` + +Define custom groups to control how alternatives are organized before sorting. + +```ts +interface CustomGroupDefinition { + groupName: string + selector?: 'alias' | 'pattern' + elementNamePattern?: string | string[] | { pattern: string; flags?: string } | { pattern: string; flags?: string }[] + elementValuePattern?: string | string[] | { pattern: string; flags?: string } | { pattern: string; flags?: string }[] + type?: 'alphabetical' | 'natural' | 'line-length' | 'custom' | 'unsorted' + order?: 'asc' | 'desc' + fallbackSort?: { type: string; order?: 'asc' | 'desc' } +} + +interface CustomGroupAnyOfDefinition { + groupName: string + anyOf: Array<{ + selector?: 'alias' | 'pattern' + elementNamePattern?: string | string[] | { pattern: string; flags?: string } | { pattern: string; flags?: string }[] + elementValuePattern?: string | string[] | { pattern: string; flags?: string } | { pattern: string; flags?: string }[] + }> + type?: 'alphabetical' | 'natural' | 'line-length' | 'custom' | 'unsorted' + order?: 'asc' | 'desc' + fallbackSort?: { type: string; order?: 'asc' | 'desc' } +} +``` + +Attributes: + +- `groupName` — Identifier referenced in [`groups`](#groups). +- `selector` — Filter by element type: + - `'alias'` — Named capturing group alternatives. + - `'pattern'` — Plain alternatives without aliases. +- `elementNamePattern` — Regex applied to the alias (if present). +- `elementValuePattern` — Regex applied to the branch content. +- `type`, `order`, `fallbackSort` — Override the respective global options for the group. + +Custom groups are evaluated in order; the first match wins and overrides predefined groups. + +### groups + + + type: `Array` + +default: `[]` + +Controls the order in which groups are evaluated. Use the selectors from [`customGroups`](#customgroups) or custom group names. When an element does not match any provided group, it falls back to the implicit `unknown` group at the end of the list. + +You can combine multiple groups by wrapping them in an array; all members will be sorted together using the global or overridden options. + +## Usage + + + +## Version + +This rule was introduced in [v5.0.0](https://github.com/azat-io/eslint-plugin-perfectionist/releases/tag/v5.0.0). + +## Resources + +- [Rule source](https://github.com/azat-io/eslint-plugin-perfectionist/blob/main/rules/sort-regexp.ts) +- [Test source](https://github.com/azat-io/eslint-plugin-perfectionist/blob/main/test/rules/sort-regexp.test.ts) diff --git a/docs/public/llms.txt b/docs/public/llms.txt index d3fc25011..7a893924f 100644 --- a/docs/public/llms.txt +++ b/docs/public/llms.txt @@ -44,6 +44,7 @@ Key features: - [sort-named-imports](https://perfectionist.dev/rules/sort-named-imports): Sort named imports - [sort-object-types](https://perfectionist.dev/rules/sort-object-types): Sort TypeScript object type properties - [sort-objects](https://perfectionist.dev/rules/sort-objects): Sort object properties +- [sort-regexp](https://perfectionist.dev/rules/sort-regexp): Sort regular expressions - [sort-sets](https://perfectionist.dev/rules/sort-sets): Sort Set elements - [sort-switch-case](https://perfectionist.dev/rules/sort-switch-case): Sort switch statement cases - [sort-union-types](https://perfectionist.dev/rules/sort-union-types): Sort TypeScript union types diff --git a/index.ts b/index.ts index 28ee0a54b..db022b36e 100644 --- a/index.ts +++ b/index.ts @@ -20,6 +20,7 @@ import sortImports from './rules/sort-imports' import sortExports from './rules/sort-exports' import sortObjects from './rules/sort-objects' import sortModules from './rules/sort-modules' +import sortRegexp from './rules/sort-regexp' import sortEnums from './rules/sort-enums' import sortMaps from './rules/sort-maps' import sortSets from './rules/sort-sets' @@ -45,6 +46,7 @@ interface PluginConfig { 'sort-imports': Rule.RuleModule 'sort-exports': Rule.RuleModule 'sort-objects': Rule.RuleModule + 'sort-regexp': Rule.RuleModule 'sort-enums': Rule.RuleModule 'sort-sets': Rule.RuleModule 'sort-maps': Rule.RuleModule @@ -92,6 +94,7 @@ export let rules = { 'sort-imports': sortImports, 'sort-exports': sortExports, 'sort-objects': sortObjects, + 'sort-regexp': sortRegexp, 'sort-enums': sortEnums, 'sort-sets': sortSets, 'sort-maps': sortMaps, diff --git a/package.json b/package.json index a00ce46b6..9ad164f36 100644 --- a/package.json +++ b/package.json @@ -57,6 +57,7 @@ "test:unit": "vitest --run --coverage" }, "dependencies": { + "@eslint-community/regexpp": "^4.12.1", "@typescript-eslint/utils": "^8.46.2", "natural-orderby": "^5.0.0" }, diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 2e32ec2bc..e015d53cd 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -10,6 +10,9 @@ importers: .: dependencies: + '@eslint-community/regexpp': + specifier: ^4.12.1 + version: 4.12.2 '@typescript-eslint/utils': specifier: ^8.46.2 version: 8.46.2(eslint@9.36.0(jiti@2.6.1))(typescript@5.9.3) diff --git a/readme.md b/readme.md index 39fde471c..54f7692b2 100644 --- a/readme.md +++ b/readme.md @@ -193,6 +193,7 @@ module.exports = { | [sort-named-imports](https://perfectionist.dev/rules/sort-named-imports) | Enforce sorted named imports | 🔧 | | [sort-object-types](https://perfectionist.dev/rules/sort-object-types) | Enforce sorted object types | 🔧 | | [sort-objects](https://perfectionist.dev/rules/sort-objects) | Enforce sorted objects | 🔧 | +| [sort-regexp](https://perfectionist.dev/rules/sort-regexp) | Enforce sorted regular expressions | 🔧 | | [sort-sets](https://perfectionist.dev/rules/sort-sets) | Enforce sorted Set elements | 🔧 | | [sort-switch-case](https://perfectionist.dev/rules/sort-switch-case) | Enforce sorted switch case statements | 🔧 | | [sort-union-types](https://perfectionist.dev/rules/sort-union-types) | Enforce sorted union types | 🔧 | diff --git a/rules/sort-regexp.ts b/rules/sort-regexp.ts new file mode 100644 index 000000000..ba4d56081 --- /dev/null +++ b/rules/sort-regexp.ts @@ -0,0 +1,416 @@ +import type { CharacterClass } from '@eslint-community/regexpp/ast' +import type { TSESTree } from '@typescript-eslint/types' + +import { parseRegExpLiteral, visitRegExpAST } from '@eslint-community/regexpp' + +import type { SortingNode } from '../types/sorting-node' +import type { Options } from './sort-regexp/types' + +import { + buildCustomGroupsArrayJsonSchema, + commonJsonSchemas, + groupsJsonSchema, +} from '../utils/common-json-schemas' +import { alternativesContainUnnamedCapturingGroups } from './sort-regexp/alternatives-contain-unnamed-capturing-groups' +import { buildGetCustomGroupOverriddenOptionsFunction } from '../utils/get-custom-groups-compare-options' +import { validateGeneratedGroupsConfiguration } from '../utils/validate-generated-groups-configuration' +import { getCharacterClassElementSortKey } from './sort-regexp/get-character-class-element-sort-key' +import { + singleCustomGroupJsonSchema, + allModifiers, + allSelectors, +} from './sort-regexp/types' +import { createCharacterClassSortingNode } from './sort-regexp/create-character-class-sorting-node' +import { validateCustomSortConfiguration } from '../utils/validate-custom-sort-configuration' +import { hasShadowingAlternatives } from './sort-regexp/has-shadowing-alternatives' +import { createFlagSortingNodes } from './sort-regexp/create-flag-sorting-nodes' +import { getEslintDisabledLines } from '../utils/get-eslint-disabled-lines' +import { isCapturingContext } from './sort-regexp/is-capturing-context' +import { createSortingNode } from './sort-regexp/create-sorting-node' +import { createNodeIndexMap } from '../utils/create-node-index-map' +import { sortNodesByGroups } from '../utils/sort-nodes-by-groups' +import { createEslintRule } from '../utils/create-eslint-rule' +import { reportAllErrors } from '../utils/report-all-errors' +import { ORDER_ERROR } from '../utils/report-errors' +import { getSettings } from '../utils/get-settings' +import { isSortable } from '../utils/is-sortable' +import { sortNodes } from '../utils/sort-nodes' +import { pairwise } from '../utils/pairwise' +import { complete } from '../utils/complete' +import { compare } from '../utils/compare' + +type SortRegExpSortingNode = SortingNode + +type ResolvedOptions = Required + +type MessageId = 'unexpectedRegExpOrder' + +/** + * Retrieves the original source text for a character class element. + * + * @param parameters - Character class element metadata. + * @returns Slice of the literal raw string covering the element. + */ +function getCharacterClassElementRawText({ + literalRaw, + element, +}: { + element: CharacterClass['elements'][number] + literalRaw: string +}): string { + return literalRaw.slice(element.start, element.end) +} + +let defaultOptions: ResolvedOptions = { + fallbackSort: { type: 'unsorted' }, + specialCharacters: 'keep', + type: 'alphabetical', + ignoreAlias: false, + ignoreCase: true, + customGroups: [], + locales: 'en-US', + alphabet: '', + order: 'asc', + groups: [], +} + +export default createEslintRule({ + create: context => { + let settings = getSettings(context.settings) + let options = complete( + context.options.at(0), + settings, + defaultOptions, + ) + + validateCustomSortConfiguration(options) + validateGeneratedGroupsConfiguration({ + selectors: allSelectors, + modifiers: allModifiers, + options, + }) + + let { sourceCode, id } = context + let literalsWithShadowing = new WeakSet() + + function handleLiteral(literalNode: TSESTree.Literal): void { + if (!('regex' in literalNode)) { + return + } + + let eslintDisabledLines = getEslintDisabledLines({ + ruleName: id, + sourceCode, + }) + + let ast = parseRegExpLiteral(literalNode.raw) + + if (literalNode.regex.flags) { + let flagNodes = createFlagSortingNodes({ + flags: literalNode.regex.flags, + eslintDisabledLines, + literalNode, + }) + + let sortedFlagNodes = sortNodes({ + fallbackSortNodeValueGetter: null, + ignoreEslintDisabledNodes: false, + nodeValueGetter: null, + nodes: flagNodes, + options, + }) + let originalFlags = flagNodes.map(node => node.name).join('') + let sortedFlags = sortedFlagNodes.map(node => node.name).join('') + + if (originalFlags !== sortedFlags) { + let nodeIndexMap = createNodeIndexMap(sortedFlagNodes) + let misplacedPair: { + right: SortRegExpSortingNode + left: SortRegExpSortingNode + } = { + right: sortedFlagNodes.at(0)!, + left: flagNodes.at(-1)!, + } + + let isMisplacedPairFound = false + + pairwise(flagNodes, (left, right) => { + if (isMisplacedPairFound || !left) { + return + } + + let leftIndex = nodeIndexMap.get(left)! + let rightIndex = nodeIndexMap.get(right)! + + if (leftIndex > rightIndex) { + misplacedPair = { right, left } + isMisplacedPairFound = true + } + }) + + let { right, left } = misplacedPair + + context.report({ + fix: fixer => { + let flagsLength = literalNode.regex.flags.length + let flagsStart = literalNode.range[1] - flagsLength + return fixer.replaceTextRange( + [flagsStart, literalNode.range[1]], + sortedFlags, + ) + }, + data: { + right: right.name, + left: left.name, + }, + messageId: 'unexpectedRegExpOrder', + node: literalNode, + }) + } + } + + visitRegExpAST(ast, { + onAlternativeLeave(alternative) { + if ( + !isCapturingContext(alternative.parent) || + !isSortable(alternative.parent.alternatives) + ) { + return + } + + if (alternative !== alternative.parent.alternatives.at(-1)) { + return + } + + if ( + alternativesContainUnnamedCapturingGroups( + alternative.parent.alternatives, + ) + ) { + return + } + + if ( + hasShadowingAlternatives({ + alternatives: alternative.parent.alternatives, + flags: literalNode.regex.flags, + }) + ) { + literalsWithShadowing.add(literalNode) + return + } + + let nodes = alternative.parent.alternatives.map(currentAlternative => + createSortingNode({ + alternative: currentAlternative, + eslintDisabledLines, + literalNode, + sourceCode, + options, + }), + ) + + let getOptionsByGroupIndex = + buildGetCustomGroupOverriddenOptionsFunction(options) + + function sortAlternatives( + ignoreEslintDisabledNodes: boolean, + ): SortRegExpSortingNode[] { + return sortNodesByGroups({ + ignoreEslintDisabledNodes, + getOptionsByGroupIndex, + groups: options.groups, + nodes, + }) + } + + let sortedAlternatives = sortAlternatives(false) + let isAlreadySorted = nodes.every( + (sortingNode, index) => + sortingNode.node === sortedAlternatives.at(index)?.node, + ) + + if (isAlreadySorted) { + return + } + + reportAllErrors({ + availableMessageIds: { + unexpectedGroupOrder: 'unexpectedRegExpOrder', + unexpectedOrder: 'unexpectedRegExpOrder', + }, + options: { + customGroups: options.customGroups, + newlinesBetween: 'ignore', + groups: options.groups, + }, + sortNodesExcludingEslintDisabled: sortAlternatives, + sourceCode, + context, + nodes, + }) + }, + }) + + visitRegExpAST(ast, { + onCharacterClassLeave(characterClass) { + if (literalsWithShadowing.has(literalNode)) { + return + } + + let { elements, negate, start, end } = characterClass + if (!isSortable(elements)) { + return + } + + if (options.type === 'unsorted') { + return + } + + if (literalNode.regex.flags.includes('v')) { + return + } + + let sortedElements = [...elements].toSorted((a, b) => { + let aKey = getCharacterClassElementSortKey(a) + let bKey = getCharacterClassElementSortKey(b) + + if ( + options.type !== 'line-length' && + aKey.category !== bKey.category + ) { + let categoryDiff = aKey.category - bKey.category + return options.order === 'asc' ? categoryDiff : -categoryDiff + } + + let aNode = createCharacterClassSortingNode({ + literalNode, + element: a, + }) + let bNode = createCharacterClassSortingNode({ + literalNode, + element: b, + }) + + let comparison = compare({ + fallbackSortNodeValueGetter: null, + nodeValueGetter: null, + a: aNode, + b: bNode, + options, + }) + + if (comparison !== 0) { + return comparison + } + + let rawComparison = aKey.raw.localeCompare( + bKey.raw, + options.locales, + { + sensitivity: options.ignoreCase ? 'base' : 'variant', + numeric: options.type === 'natural', + }, + ) + + let rawOrderMultiplier = 1 + if (options.type !== 'line-length' && options.order !== 'asc') { + rawOrderMultiplier = -1 + } + + return rawOrderMultiplier * rawComparison + }) + + let needsSort = elements.some( + (element, i) => element !== sortedElements[i], + ) + + if (needsSort) { + let literalRaw = literalNode.raw + let originalRawElements = elements.map(element => + getCharacterClassElementRawText({ + literalRaw, + element, + }), + ) + let sortedRawElements = sortedElements.map(element => + getCharacterClassElementRawText({ + literalRaw, + element, + }), + ) + + let mismatchIndex = originalRawElements.findIndex( + (raw, index) => raw !== sortedRawElements[index], + ) + + let safeMismatchIndex = Math.max(mismatchIndex, 0) + + let left = originalRawElements[safeMismatchIndex]! + let right = sortedRawElements[safeMismatchIndex]! + + context.report({ + fix: fixer => { + let [literalStart] = literalNode.range + let classStart = literalStart + start + let classEnd = literalStart + end + let replacement = negate + ? `[^${sortedRawElements.join('')}]` + : `[${sortedRawElements.join('')}]` + + return fixer.replaceTextRange( + [classStart, classEnd], + replacement, + ) + }, + data: { + right, + left, + }, + messageId: 'unexpectedRegExpOrder', + node: literalNode, + }) + } + }, + }) + } + + return { + Literal: handleLiteral, + } + }, + meta: { + schema: { + items: { + properties: { + ...commonJsonSchemas, + ignoreAlias: { + description: 'Controls whether to ignore alias names.', + type: 'boolean', + }, + customGroups: buildCustomGroupsArrayJsonSchema({ + singleCustomGroupJsonSchema, + }), + groups: groupsJsonSchema, + }, + additionalProperties: false, + type: 'object', + }, + uniqueItems: true, + type: 'array', + }, + docs: { + url: 'https://perfectionist.dev/rules/sort-regexp', + description: 'Enforce sorted regular expressions.', + recommended: true, + }, + messages: { + unexpectedRegExpOrder: ORDER_ERROR, + }, + defaultOptions: [defaultOptions], + type: 'suggestion', + fixable: 'code', + }, + defaultOptions: [defaultOptions], + name: 'sort-regexp', +}) diff --git a/rules/sort-regexp/alternatives-contain-unnamed-capturing-groups.ts b/rules/sort-regexp/alternatives-contain-unnamed-capturing-groups.ts new file mode 100644 index 000000000..148692e94 --- /dev/null +++ b/rules/sort-regexp/alternatives-contain-unnamed-capturing-groups.ts @@ -0,0 +1,71 @@ +import type { Alternative, Element } from '@eslint-community/regexpp/ast' + +/** + * Checks whether any alternative contains an unnamed capturing group. + * + * @param alternatives - Alternatives to inspect. + * @returns True if at least one unnamed capturing group is found. + */ +export function alternativesContainUnnamedCapturingGroups( + alternatives: Alternative[], +): boolean { + return alternatives.some(alternativeContainsUnnamedCapturingGroups) +} + +/** + * Determines whether the given element (or any nested element) contains an + * unnamed capturing group. + * + * @param element - Regex element to inspect. + * @returns True when an unnamed capturing group is found. + */ +function elementContainsUnnamedCapturingGroups(element: Element): boolean { + switch (element.type) { + case 'CapturingGroup': { + if (!element.name) { + return true + } + + return element.alternatives.some( + alternativeContainsUnnamedCapturingGroups, + ) + } + + case 'Quantifier': { + return elementContainsUnnamedCapturingGroups(element.element) + } + + case 'Assertion': { + if (element.kind === 'lookahead' || element.kind === 'lookbehind') { + return element.alternatives.some( + alternativeContainsUnnamedCapturingGroups, + ) + } + + return false + } + + case 'Group': { + return element.alternatives.some( + alternativeContainsUnnamedCapturingGroups, + ) + } + + default: { + return false + } + } +} + +/** + * Checks whether an alternative contains unnamed capturing groups by inspecting + * all of its direct elements. + * + * @param alternative - Alternative to inspect. + * @returns True when an unnamed capture appears within the alternative. + */ +function alternativeContainsUnnamedCapturingGroups( + alternative: Alternative, +): boolean { + return alternative.elements.some(elementContainsUnnamedCapturingGroups) +} diff --git a/rules/sort-regexp/create-character-class-sorting-node.ts b/rules/sort-regexp/create-character-class-sorting-node.ts new file mode 100644 index 000000000..025510b18 --- /dev/null +++ b/rules/sort-regexp/create-character-class-sorting-node.ts @@ -0,0 +1,31 @@ +import type { CharacterClass } from '@eslint-community/regexpp/ast' +import type { TSESTree } from '@typescript-eslint/types' + +import type { SortingNode } from '../../types/sorting-node' + +import { getCharacterClassElementSortKey } from './get-character-class-element-sort-key' + +/** + * Creates a sorting node for a character class element. + * + * @param parameters - Character class element metadata. + * @returns Sorting node describing the element. + */ +export function createCharacterClassSortingNode({ + literalNode, + element, +}: { + element: CharacterClass['elements'][number] + literalNode: TSESTree.Literal +}): SortingNode { + let key = getCharacterClassElementSortKey(element) + + return { + group: 'character-class', + isEslintDisabled: false, + size: key.raw.length, + name: key.normalized, + node: literalNode, + partitionId: 0, + } +} diff --git a/rules/sort-regexp/create-flag-sorting-nodes.ts b/rules/sort-regexp/create-flag-sorting-nodes.ts new file mode 100644 index 000000000..3e8d7b912 --- /dev/null +++ b/rules/sort-regexp/create-flag-sorting-nodes.ts @@ -0,0 +1,32 @@ +import type { TSESTree } from '@typescript-eslint/types' + +import type { SortingNode } from '../../types/sorting-node' + +import { isNodeEslintDisabled } from '../../utils/is-node-eslint-disabled' + +/** + * Builds sorting nodes for every flag attached to a regular expression literal. + * + * @param parameters - Literal context alongside enabled flags. + * @returns Sorting nodes representing each flag. + */ +export function createFlagSortingNodes({ + eslintDisabledLines, + literalNode, + flags, +}: { + eslintDisabledLines: number[] + literalNode: TSESTree.Literal + flags: string +}): SortingNode[] { + let isDisabled = isNodeEslintDisabled(literalNode, eslintDisabledLines) + + return [...flags].map(flag => ({ + isEslintDisabled: isDisabled, + node: literalNode, + partitionId: 0, + group: 'flags', + name: flag, + size: 1, + })) +} diff --git a/rules/sort-regexp/create-pseudo-literal-node.ts b/rules/sort-regexp/create-pseudo-literal-node.ts new file mode 100644 index 000000000..5e8e39b37 --- /dev/null +++ b/rules/sort-regexp/create-pseudo-literal-node.ts @@ -0,0 +1,39 @@ +import type { Alternative } from '@eslint-community/regexpp/ast' +import type { TSESTree } from '@typescript-eslint/types' +import type { TSESLint } from '@typescript-eslint/utils' + +import { AST_NODE_TYPES } from '@typescript-eslint/types' + +/** + * Produces a pseudo literal node representing a regex alternative. + * + * @param parameters - Source literal context and alternative. + * @returns Literal node mirroring the alternative segment. + */ +export function createPseudoLiteralNode({ + literalNode, + alternative, + sourceCode, +}: { + sourceCode: TSESLint.SourceCode + literalNode: TSESTree.Literal + alternative: Alternative +}): TSESTree.Literal { + let [literalStart] = literalNode.range + let offsetStart = literalStart + alternative.start + let offsetEnd = literalStart + alternative.end + let range: TSESTree.Range = [offsetStart, offsetEnd] + let loc = { + start: sourceCode.getLocFromIndex(range[0]), + end: sourceCode.getLocFromIndex(range[1]), + } + + return { + type: AST_NODE_TYPES.Literal, + value: alternative.raw, + raw: alternative.raw, + parent: literalNode, + range, + loc, + } as TSESTree.Literal +} diff --git a/rules/sort-regexp/create-sorting-node.ts b/rules/sort-regexp/create-sorting-node.ts new file mode 100644 index 000000000..89998f2c7 --- /dev/null +++ b/rules/sort-regexp/create-sorting-node.ts @@ -0,0 +1,72 @@ +import type { Alternative } from '@eslint-community/regexpp/ast' +import type { TSESLint } from '@typescript-eslint/utils' +import type { TSESTree } from '@typescript-eslint/types' + +import type { SortingNode } from '../../types/sorting-node' +import type { Selector } from './types' +import type { Options } from './types' + +import { doesCustomGroupMatch } from '../../utils/does-custom-group-match' +import { isNodeEslintDisabled } from '../../utils/is-node-eslint-disabled' +import { createPseudoLiteralNode } from './create-pseudo-literal-node' +import { getAlternativeAlias } from './get-alternative-alias' +import { computeGroup } from '../../utils/compute-group' + +interface CreateSortingNodeParameters { + sourceCode: TSESLint.SourceCode + literalNode: TSESTree.Literal + eslintDisabledLines: number[] + alternative: Alternative + options: ResolvedOptions +} + +type ResolvedOptions = Required + +/** + * Builds a sortable node representation for a regex alternative. + * + * @param parameters - Alternative context with rule settings. + * @returns Sorting node ready for ordering logic. + */ +export function createSortingNode({ + eslintDisabledLines, + literalNode, + alternative, + sourceCode, + options, +}: CreateSortingNodeParameters): SortingNode { + let alternativeAlias = getAlternativeAlias(alternative) + let selector: Selector = alternativeAlias ? 'alias' : 'pattern' + let name = + !options.ignoreAlias && alternativeAlias + ? `${alternativeAlias}: ${alternative.raw}` + : alternative.raw + + let group = computeGroup({ + customGroupMatcher: customGroup => + doesCustomGroupMatch({ + elementValue: alternative.raw, + selectors: [selector], + elementName: name, + modifiers: [], + customGroup, + }), + predefinedGroups: [selector], + options, + }) + + let pseudoNode = createPseudoLiteralNode({ + literalNode, + alternative, + sourceCode, + }) + + return { + isEslintDisabled: isNodeEslintDisabled(literalNode, eslintDisabledLines), + size: pseudoNode.range[1] - pseudoNode.range[0], + node: pseudoNode, + partitionId: 0, + group, + name, + } +} diff --git a/rules/sort-regexp/does-alternative-shadow-other.ts b/rules/sort-regexp/does-alternative-shadow-other.ts new file mode 100644 index 000000000..70542d1e2 --- /dev/null +++ b/rules/sort-regexp/does-alternative-shadow-other.ts @@ -0,0 +1,25 @@ +/** + * Detects whether one alternative shadows (is a prefix of) another. + * + * @param first - First alternative text. + * @param second - Second alternative text. + * @returns True when either alternative makes the other unreachable. + */ +export function doesAlternativeShadowOther( + first: string, + second: string, +): boolean { + if (first.length === 0 || second.length === 0) { + return true + } + + if (first.length === second.length) { + return first === second + } + + if (first.length < second.length) { + return second.startsWith(first) + } + + return first.startsWith(second) +} diff --git a/rules/sort-regexp/get-alternative-alias.ts b/rules/sort-regexp/get-alternative-alias.ts new file mode 100644 index 000000000..35bc6bb0c --- /dev/null +++ b/rules/sort-regexp/get-alternative-alias.ts @@ -0,0 +1,20 @@ +import type { Alternative } from '@eslint-community/regexpp/ast' + +/** + * Extracts an alias name for a given alternative, if present. + * + * @param alternative - Alternative to inspect. + * @returns Alias name or null when absent. + */ +export function getAlternativeAlias(alternative: Alternative): string | null { + let [element] = alternative.elements + if (element?.type === 'CapturingGroup' && element.name) { + return element.name + } + + if (alternative.parent.type === 'CapturingGroup' && alternative.parent.name) { + return alternative.parent.name + } + + return null +} diff --git a/rules/sort-regexp/get-character-class-element-category.ts b/rules/sort-regexp/get-character-class-element-category.ts new file mode 100644 index 000000000..4cf5400cb --- /dev/null +++ b/rules/sort-regexp/get-character-class-element-category.ts @@ -0,0 +1,127 @@ +import type { CharacterClass } from '@eslint-community/regexpp/ast' + +const DIGIT_CHARACTER_PATTERN = /^\p{Nd}$/u +const LOWERCASE_CHARACTER_PATTERN = /^\p{Ll}$/u +const UPPERCASE_CHARACTER_PATTERN = /^\p{Lu}$/u +const HYPHEN_CODE_POINT = '-'.codePointAt(0)! + +/** + * Maps a character class element to a sortable category bucket. + * + * @param element - Character class element to categorize. + * @returns Numeric category representing the element group. + */ +export function getCharacterClassElementCategory( + element: CharacterClass['elements'][number], +): number { + let category = 4 + + switch (element.type) { + case 'CharacterClassRange': { + if ( + isDigitCharacter(element.min.value) && + isDigitCharacter(element.max.value) + ) { + category = 0 + } else if ( + isUppercaseCharacter(element.min.value) && + isUppercaseCharacter(element.max.value) + ) { + category = 1 + } else if ( + isLowercaseCharacter(element.min.value) && + isLowercaseCharacter(element.max.value) + ) { + category = 2 + } else { + category = 3 + } + + break + } + case 'CharacterSet': { + switch (element.kind) { + case 'digit': { + category = 0 + + break + } + case 'space': { + category = 3 + + break + } + case 'word': { + category = 2 + + break + } + // No default + } + + break + } + case 'Character': { + if (element.value === HYPHEN_CODE_POINT) { + category = 5 + + break + } + + if (isDigitCharacter(element.value)) { + category = 0 + } else if (isUppercaseCharacter(element.value)) { + category = 1 + } else if (isLowercaseCharacter(element.value)) { + category = 2 + } else { + category = 3 + } + + break + } + /* No default. */ + } + + return category +} + +/** + * Determines whether the provided code point is a lowercase letter. + * + * @param value - Code point to evaluate. + * @returns True when the value is lowercase. + */ +export function isLowercaseCharacter(value: number): boolean { + return LOWERCASE_CHARACTER_PATTERN.test(codePointToString(value)) +} + +/** + * Determines whether the provided code point is an uppercase letter. + * + * @param value - Code point to evaluate. + * @returns True when the value is uppercase. + */ +export function isUppercaseCharacter(value: number): boolean { + return UPPERCASE_CHARACTER_PATTERN.test(codePointToString(value)) +} + +/** + * Checks whether the given code point represents a digit character. + * + * @param value - Code point to inspect. + * @returns True when the value corresponds to a digit character. + */ +export function isDigitCharacter(value: number): boolean { + return DIGIT_CHARACTER_PATTERN.test(codePointToString(value)) +} + +/** + * Converts the provided code point into its string representation. + * + * @param value - Code point to convert. + * @returns String representation of the code point. + */ +function codePointToString(value: number): string { + return String.fromCodePoint(value) +} diff --git a/rules/sort-regexp/get-character-class-element-sort-key.ts b/rules/sort-regexp/get-character-class-element-sort-key.ts new file mode 100644 index 000000000..26f5fd7f0 --- /dev/null +++ b/rules/sort-regexp/get-character-class-element-sort-key.ts @@ -0,0 +1,26 @@ +import type { CharacterClass } from '@eslint-community/regexpp/ast' + +import { getCharacterClassElementCategory } from './get-character-class-element-category' +import { getCharacterClassElementValue } from './get-character-class-element-value' + +export interface CharacterClassElementSortKey { + normalized: string + category: number + raw: string +} + +/** + * Builds a composite key describing how a character class element should sort. + * + * @param element - Character class element to analyze. + * @returns Sort key used by character class sorting routines. + */ +export function getCharacterClassElementSortKey( + element: CharacterClass['elements'][number], +): CharacterClassElementSortKey { + return { + category: getCharacterClassElementCategory(element), + normalized: getCharacterClassElementValue(element), + raw: element.raw, + } +} diff --git a/rules/sort-regexp/get-character-class-element-value.ts b/rules/sort-regexp/get-character-class-element-value.ts new file mode 100644 index 000000000..3855a6169 --- /dev/null +++ b/rules/sort-regexp/get-character-class-element-value.ts @@ -0,0 +1,36 @@ +import type { CharacterClass } from '@eslint-community/regexpp/ast' + +/** + * Produces a normalized representation for a character class element. + * + * @param element - Character class element to transform. + * @returns Normalized string used for comparisons. + */ +export function getCharacterClassElementValue( + element: CharacterClass['elements'][number], +): string { + let rawValue = element.raw + + switch (element.type) { + case 'CharacterClassRange': { + rawValue = `${String.fromCodePoint(element.min.value)}-${String.fromCodePoint( + element.max.value, + )}` + + break + } + case 'CharacterSet': { + rawValue = `\\${element.kind}` + + break + } + case 'Character': { + rawValue = String.fromCodePoint(element.value) + + break + } + /* No default. */ + } + + return rawValue +} diff --git a/rules/sort-regexp/get-first-character-paths.ts b/rules/sort-regexp/get-first-character-paths.ts new file mode 100644 index 000000000..910aa6368 --- /dev/null +++ b/rules/sort-regexp/get-first-character-paths.ts @@ -0,0 +1,700 @@ +import type { + CapturingGroup, + CharacterClass, + CharacterSet, + Alternative, + Quantifier, + Element, + Group, +} from '@eslint-community/regexpp/ast' + +const MAX_FIRST_CHARACTER_PATHS = 32 + +/** Represents a deterministic path for the first character of an alternative. */ +export interface FirstCharacterPath { + /** Matcher that consumes the first character along the path. */ + matcher: CharacterMatcher + + /** Indicates whether additional characters must follow to complete the match. */ + requiresMore: boolean + + /** + * Indicates whether the alternative can consume more characters after the + * prefix. + */ + canMatchMore: boolean +} + +/** Matcher produced from a character class AST node. */ +export interface CharacterMatcherCharacterClass { + /** Identifies the matcher as a character class. */ + type: 'character-class' + + /** AST node that defines the character class. */ + value: CharacterClass +} + +/** Matcher produced from a character set AST node. */ +export interface CharacterMatcherCharacterSet { + /** Identifies the matcher as a character set. */ + type: 'character-set' + + /** AST node that defines the character set. */ + value: CharacterSet +} + +/** Matcher that describes a literal character code point. */ +export interface CharacterMatcherCharacter { + /** Identifies the matcher as a literal character. */ + type: 'character' + + /** Unicode code point matched by the literal. */ + value: number +} + +/** + * Describes a matcher capable of consuming the first character of an + * alternative. + */ +export type CharacterMatcher = + | CharacterMatcherCharacterClass + | CharacterMatcherCharacterSet + | CharacterMatcherCharacter + +/** Tracks shared analysis state while traversing the AST. */ +interface AnalysisContext { + /** Cache storing computed minimum lengths for AST nodes. */ + minLengthCache: WeakMap + + /** Cache storing computed maximum lengths for AST nodes. */ + maxLengthCache: WeakMap + + /** Alternatives currently on the recursion stack. */ + minLengthActiveAlternatives: Set + + /** Alternatives on the recursion stack for maximum-length calculation. */ + maxLengthActiveAlternatives: Set + + /** Indicates whether collection exceeded the maximum allowed paths. */ + limitExceeded: boolean + + /** Count of collected paths used for enforcing the limit. */ + pathCount: number +} + +/** Internal extension that includes metadata needed during traversal. */ +interface FirstCharacterPathInternal extends FirstCharacterPath { + /** Mirrors the public flags for convenience when mutating paths. */ + requiresMore: boolean + canMatchMore: boolean +} + +type LengthResult = LengthInfo | null + +type LengthInfo = 0 | 1 | 2 + +/** + * Computes all deterministic first-character paths for the given alternative. + * + * @param alternative - Alternative to analyze. + * @returns Collection of first-character matchers with information whether more + * characters are required afterwards. + */ +export function getFirstCharacterPaths( + alternative: Alternative, +): FirstCharacterPath[] { + let context: AnalysisContext = { + minLengthActiveAlternatives: new Set(), + maxLengthActiveAlternatives: new Set(), + minLengthCache: new WeakMap(), + maxLengthCache: new WeakMap(), + limitExceeded: false, + pathCount: 0, + } + + let paths = collectFirstCharacterPathsFromAlternative(alternative, context) + + if (context.limitExceeded) { + return [] + } + + return paths +} + +/** + * Computes the maximum possible length for an element. + * + * @param element - AST element to analyze. + * @param context - Shared traversal context. + * @returns Maximum length in characters, `2` for "two or more", or `null` if + * unknown. + */ +function getElementMaxLength( + element: Element, + context: AnalysisContext, +): LengthResult { + // Defensive guard triggers only when traversal exceeded path limit earlier. + /* c8 ignore next 3 */ + if (context.limitExceeded) { + return null + } + + let cached = context.maxLengthCache.get(element) + + if (cached !== undefined) { + return cached + } + + let result: LengthResult = null + + switch (element.type) { + case 'CharacterClass': + case 'CharacterSet': + case 'Character': { + result = 1 + break + } + case 'CapturingGroup': + case 'Group': { + result = getGroupMaxLength(element, context) + break + } + case 'Backreference': { + result = null + break + } + case 'Quantifier': { + let innerLength = getElementMaxLength(element.element, context) + + if (innerLength === null) { + result = null + break + } + + // Numerical sentinels are unreachable with current AST inputs. + /* c8 ignore start */ + if (innerLength === 0 || element.max === 0) { + result = 0 + break + } + + if (element.max === Infinity) { + result = 2 + break + } + /* c8 ignore stop */ + + result = multiplyLength(innerLength, element.max) + break + } + case 'Assertion': { + result = 0 + break + } + default: { + result = null + } + } + + context.maxLengthCache.set(element, result) + + return result +} + +/** + * Collects deterministic first-character paths that originate from the provided + * element. + * + * @param element - AST element to analyze. + * @param context - Shared traversal context. + * @returns Paths that can begin with the provided element. + */ +function collectFirstCharacterPathsFromElement( + element: Element, + context: AnalysisContext, +): FirstCharacterPathInternal[] { + switch (element.type) { + case 'CharacterClass': { + if (element.unicodeSets) { + return [] + } + + return [ + { + matcher: { type: 'character-class', value: element }, + requiresMore: false, + canMatchMore: false, + }, + ] + } + case 'CapturingGroup': + case 'Group': { + return element.alternatives.flatMap(alternative => + collectFirstCharacterPathsFromAlternative(alternative, context), + ) + } + case 'CharacterSet': { + if (element.kind === 'property' && element.strings) { + return [] + } + + return [ + { + matcher: { type: 'character-set', value: element }, + requiresMore: false, + canMatchMore: false, + }, + ] + } + case 'Quantifier': { + return collectFirstCharacterPathsFromQuantifier(element, context) + } + case 'Character': { + return [ + { + matcher: { value: element.value, type: 'character' }, + requiresMore: false, + canMatchMore: false, + }, + ] + } + default: { + return [] + } + } +} + +/** + * Collects all first-character paths for an alternative. + * + * @param alternative - Alternative whose elements should be inspected. + * @param context - Shared traversal context. + * @returns Paths describing all deterministic prefixes. + */ +function collectFirstCharacterPathsFromAlternative( + alternative: Alternative, + context: AnalysisContext, +): FirstCharacterPathInternal[] { + let results: FirstCharacterPathInternal[] = [] + let { elements } = alternative + + for (let index = 0; index < elements.length; index++) { + if (context.limitExceeded) { + break + } + + let element = elements[index]! + + if (element.type === 'Assertion') { + continue + } + + let elementPaths = collectFirstCharacterPathsFromElement(element, context) + + if (elementPaths.length > 0) { + let restLength = getElementsMinLength(elements, index + 1, context) + let restMaxLength = getElementsMaxLength(elements, index + 1, context) + + if (restLength !== null) { + let restCanMatchMore = restMaxLength !== 0 + + for (let path of elementPaths) { + addPath(results, context, { + canMatchMore: path.canMatchMore || restCanMatchMore, + requiresMore: path.requiresMore || restLength > 0, + matcher: path.matcher, + }) + } + } + } + + if (!canElementMatchEmpty(element, context)) { + break + } + } + + return results +} + +/** + * Expands quantifiers into their potential first-character paths. + * + * @param quantifier - Quantifier node to analyze. + * @param context - Shared traversal context. + * @returns Paths contributed by the quantified expression. + */ +function collectFirstCharacterPathsFromQuantifier( + quantifier: Quantifier, + context: AnalysisContext, +): FirstCharacterPathInternal[] { + let innerPaths = collectFirstCharacterPathsFromElement( + quantifier.element, + context, + ) + + if (innerPaths.length === 0 || context.limitExceeded) { + return [] + } + + let innerMinLength = getElementMinLength(quantifier.element, context) + if (innerMinLength === null) { + return [] + } + + let innerMaxLength = getElementMaxLength(quantifier.element, context) + let requiresAdditionalIterations = quantifier.min > 1 && innerMinLength > 0 + let elementCanConsumeCharacters = innerMaxLength !== 0 + let allowsAdditionalIterations = + elementCanConsumeCharacters && + (quantifier.max === Infinity || quantifier.max > 1) + + return innerPaths.map(path => ({ + requiresMore: path.requiresMore || requiresAdditionalIterations, + canMatchMore: path.canMatchMore || allowsAdditionalIterations, + matcher: path.matcher, + })) +} + +/** + * Computes the minimum possible length for the provided element. + * + * @param element - AST element to analyze. + * @param context - Shared traversal context. + * @returns Minimum length in characters, `2` for "two or more", or `null` if + * unknown. + */ +function getElementMinLength( + element: Element, + context: AnalysisContext, +): LengthResult { + if (context.limitExceeded) { + return null + } + + let cached = context.minLengthCache.get(element) + + if (cached !== undefined) { + return cached + } + + let result: LengthResult = null + + switch (element.type) { + case 'CharacterClass': + case 'CharacterSet': + case 'Character': { + result = 1 + break + } + case 'CapturingGroup': + case 'Group': { + result = getGroupMinLength(element, context) + break + } + case 'Backreference': { + result = null + break + } + case 'Quantifier': { + let innerLength = getElementMinLength(element.element, context) + + result = multiplyLength(innerLength, element.min) + break + } + case 'Assertion': { + result = 0 + break + } + default: { + result = null + } + } + + context.minLengthCache.set(element, result) + + return result +} + +/** + * Computes the maximum possible length for an alternative. + * + * @param alternative - Alternative whose elements should be measured. + * @param context - Shared traversal context. + * @returns Maximum length for the entire alternative. + */ +function getAlternativeMaxLength( + alternative: Alternative, + context: AnalysisContext, +): LengthResult { + let cached = context.maxLengthCache.get(alternative) + + // Cache reuse only occurs for recursive alternatives, which tests do not create. + /* c8 ignore next 3 */ + if (cached !== undefined) { + return cached + } + + if (context.maxLengthActiveAlternatives.has(alternative)) { + return null + } + + context.maxLengthActiveAlternatives.add(alternative) + + let length = getElementsMaxLength(alternative.elements, 0, context) + + context.maxLengthActiveAlternatives.delete(alternative) + context.maxLengthCache.set(alternative, length) + + return length +} + +/** + * Computes the minimum possible length for an alternative. + * + * @param alternative - Alternative whose elements should be measured. + * @param context - Shared traversal context. + * @returns Minimum length for the entire alternative. + */ +function getAlternativeMinLength( + alternative: Alternative, + context: AnalysisContext, +): LengthResult { + let cached = context.minLengthCache.get(alternative) + + if (cached !== undefined) { + return cached + } + + if (context.minLengthActiveAlternatives.has(alternative)) { + return null + } + + context.minLengthActiveAlternatives.add(alternative) + + let length = getElementsMinLength(alternative.elements, 0, context) + + context.minLengthActiveAlternatives.delete(alternative) + context.minLengthCache.set(alternative, length) + + return length +} + +/** + * Computes the maximum length of a suffix of elements. + * + * @param elements - Sequence of elements belonging to an alternative. + * @param startIndex - Index from which the suffix begins. + * @param context - Shared traversal context. + * @returns Maximum length for the suffix. + */ +function getElementsMaxLength( + elements: Alternative['elements'], + startIndex: number, + context: AnalysisContext, +): LengthResult { + let length: LengthResult = 0 + + for (let index = startIndex; index < elements.length; index++) { + let element = elements[index]! + let elementLength = getElementMaxLength(element, context) + + length = addLengths(length, elementLength) + + if (length === null) { + return null + } + + if (length === 2) { + return 2 + } + } + + return length +} + +/** + * Computes the minimum length of a suffix of elements. + * + * @param elements - Sequence of elements belonging to an alternative. + * @param startIndex - Index from which the suffix begins. + * @param context - Shared traversal context. + * @returns Minimum length for the suffix. + */ +function getElementsMinLength( + elements: Alternative['elements'], + startIndex: number, + context: AnalysisContext, +): LengthResult { + let length: LengthResult = 0 + + for (let index = startIndex; index < elements.length; index++) { + let element = elements[index]! + let elementLength = getElementMinLength(element, context) + + length = addLengths(length, elementLength) + + if (length === null) { + return null + } + + if (length === 2) { + return 2 + } + } + + return length +} + +/** + * Computes the minimum length among the alternatives contained in a group. + * + * @param group - Capturing or non-capturing group to analyze. + * @param context - Shared traversal context. + * @returns Minimum length across the group's alternatives. + */ +function getGroupMinLength( + group: CapturingGroup | Group, + context: AnalysisContext, +): LengthResult { + let minLength: LengthResult = 2 + + for (let alternative of group.alternatives) { + let alternativeLength = getAlternativeMinLength(alternative, context) + + if (alternativeLength === null) { + return null + } + + if (alternativeLength < minLength) { + minLength = alternativeLength + } + + if (minLength === 0) { + break + } + } + + return minLength +} + +/** + * Computes the maximum length among the alternatives contained in a group. + * + * @param group - Capturing or non-capturing group to analyze. + * @param context - Shared traversal context. + * @returns Maximum length across the group's alternatives. + */ +function getGroupMaxLength( + group: CapturingGroup | Group, + context: AnalysisContext, +): LengthResult { + let maxLength: LengthResult = 0 + + for (let alternative of group.alternatives) { + let alternativeLength = getAlternativeMaxLength(alternative, context) + + if (alternativeLength === null) { + return null + } + + if (alternativeLength > maxLength) { + maxLength = alternativeLength + } + + if (maxLength === 2) { + break + } + } + + return maxLength +} + +/* c8 ignore start */ +/** + * Multiplies a minimum length by a quantifier count while respecting sentinel + * values. + * + * @param length - Minimum length of the repeated element. + * @param count - Minimum number of repetitions. + * @returns Combined minimum length or `null` when unknown. + */ +function multiplyLength(length: LengthResult, count: number): LengthResult { + if (length === null) { + return null + } + + if (length === 0 || count === 0) { + return 0 + } + + if (length === 2) { + return 2 + } + + if (count === 1) { + return length + } + + return 2 +} +/* c8 ignore stop */ + +/** + * Adds a collected path to the results while accounting for the safety limit. + * + * @param results - Accumulated path list. + * @param context - Shared traversal context. + * @param path - Path to add. + */ +function addPath( + results: FirstCharacterPathInternal[], + context: AnalysisContext, + path: FirstCharacterPathInternal, +): void { + results.push(path) + context.pathCount += 1 + + if (context.pathCount > MAX_FIRST_CHARACTER_PATHS) { + context.limitExceeded = true + } +} + +/** + * Adds two maximum-length values together, preserving sentinel semantics. + * + * @param a - First length operand. + * @param b - Second length operand. + * @returns Sum of the operands, clamped to the sentinel space. + */ +function addLengths(a: LengthResult, b: LengthResult): LengthResult { + if (a === null || b === null) { + return null + } + + if (a === 2 || b === 2) { + return 2 + } + + let sum = a + b + + if (sum >= 2) { + return 2 + } + + return sum as LengthInfo +} + +/** + * Determines whether a given element can match an empty string. + * + * @param element - AST element to inspect. + * @param context - Shared traversal context. + * @returns True when the element can match zero characters. + */ +function canElementMatchEmpty( + element: Element, + context: AnalysisContext, +): boolean { + let length = getElementMinLength(element, context) + return length === 0 +} diff --git a/rules/sort-regexp/has-shadowing-alternatives.ts b/rules/sort-regexp/has-shadowing-alternatives.ts new file mode 100644 index 000000000..f39021ef4 --- /dev/null +++ b/rules/sort-regexp/has-shadowing-alternatives.ts @@ -0,0 +1,518 @@ +import type { + CharacterUnicodePropertyCharacterSet, + CharacterClassElement, + CharacterClassRange, + CharacterClass, + CharacterSet, + Alternative, +} from '@eslint-community/regexpp/ast' + +import type { + CharacterMatcherCharacterClass, + CharacterMatcherCharacterSet, + FirstCharacterPath, + CharacterMatcher, +} from './get-first-character-paths' + +import { + isLowercaseCharacter, + isUppercaseCharacter, + isDigitCharacter, +} from './get-character-class-element-category' +import { doesAlternativeShadowOther } from './does-alternative-shadow-other' +import { getFirstCharacterPaths } from './get-first-character-paths' + +interface MatcherEvaluationContext { + dotAll: boolean +} + +/** + * Checks whether the provided alternatives contain shadowing pairs. + * + * @param parameters - Alternatives to analyze. + * @returns True when at least one alternative shadows another one. + */ +export function hasShadowingAlternatives({ + alternatives, + flags, +}: { + alternatives: Alternative[] + flags: string +}): boolean { + let hasNegatedCharacterClassAlternative = alternatives.some(alternative => { + let firstElement = alternative.elements.at(0) + + if (!firstElement) { + return false + } + + if ( + firstElement.type === 'Quantifier' && + firstElement.element.type === 'CharacterClass' + ) { + return firstElement.element.negate + } + + return firstElement.type === 'CharacterClass' && firstElement.negate + }) + + if (hasNegatedCharacterClassAlternative) { + return true + } + + let rawAlternatives = alternatives.map(alternative => alternative.raw) + + for (let index = 0; index < rawAlternatives.length; index++) { + let current = rawAlternatives[index]! + + for (let offset = index + 1; offset < rawAlternatives.length; offset++) { + let other = rawAlternatives[offset]! + + if (doesAlternativeShadowOther(current, other)) { + return true + } + } + } + + let matcherContext: MatcherEvaluationContext = { + dotAll: flags.includes('s'), + } + + if ( + hasFirstCharacterShadowing({ + matcherContext, + alternatives, + }) + ) { + return true + } + + return false +} + +function isComplementaryCharacterClass( + characterClass: CharacterClass, +): boolean { + // Unicode set aware classes never map into character-class matchers. + /* c8 ignore next 3 */ + if (characterClass.unicodeSets) { + return false + } + + let { elements } = characterClass as { + elements?: CharacterClass['elements'] + } + + if (!elements) { + return false + } + + // Empty-negated classes (e.g. `[^]`) are normalized before reaching here. + /* c8 ignore next 3 */ + if (characterClass.negate && elements.length === 0) { + return true + } + + let seen = new Map() + + for (let element of elements) { + if (element.type !== 'CharacterSet') { + continue + } + + let identifier = getCharacterSetIdentifier(element) + + // String-based property escapes are filtered earlier. + /* c8 ignore next 3 */ + if (!identifier) { + continue + } + + let previousNegation = seen.get(identifier) + + if (previousNegation === undefined) { + seen.set(identifier, element.negate) + continue + } + + if (previousNegation !== element.negate) { + return true + } + } + + return false +} + +function hasShadowingDirection({ + matcherContext, + shorterPaths, + longerPaths, +}: { + matcherContext: MatcherEvaluationContext + shorterPaths: FirstCharacterPath[] + longerPaths: FirstCharacterPath[] +}): boolean { + for (let longerPath of longerPaths) { + if (!longerPath.requiresMore && !longerPath.canMatchMore) { + continue + } + + let longerNeedsWildcardOverlap = longerPath.matcher.type !== 'character' + + for (let shorterPath of shorterPaths) { + if (shorterPath.requiresMore) { + continue + } + + if (shorterPath.canMatchMore) { + continue + } + + if (shorterPath.matcher.type === 'character') { + continue + } + + if (longerNeedsWildcardOverlap) { + if (isWildcardMatcher(shorterPath.matcher)) { + return true + } + + continue + } + + if ( + doMatchersOverlap({ + right: shorterPath.matcher, + left: longerPath.matcher, + matcherContext, + }) + ) { + return true + } + } + } + + return false +} + +function characterSetContainsCodePoint({ + matcherContext, + characterSet, + codePoint, +}: { + matcherContext: MatcherEvaluationContext + characterSet: CharacterSet + codePoint: number +}): boolean | null { + switch (characterSet.kind) { + case 'property': { + if (characterSet.strings) { + return null + } + + let matches = unicodePropertyContainsCodePoint({ + propertySet: characterSet, + codePoint, + }) + + if (matches === null) { + return null + } + + return applyNegation(characterSet, matches) + } + case 'digit': { + return applyNegation(characterSet, isDigitCharacter(codePoint)) + } + case 'space': { + return applyNegation(characterSet, isWhitespaceCharacter(codePoint)) + } + case 'word': { + return applyNegation(characterSet, isWordCharacter(codePoint)) + } + case 'any': { + return matchesAnyCharacterSet({ + matcherContext, + codePoint, + }) + } + default: { + return null + } + } +} + +function hasFirstCharacterShadowing({ + matcherContext, + alternatives, +}: { + matcherContext: MatcherEvaluationContext + alternatives: Alternative[] +}): boolean { + let firstCharacterPaths = alternatives.map(alternative => + getFirstCharacterPaths(alternative), + ) + + for (let index = 0; index < firstCharacterPaths.length; index++) { + let current = firstCharacterPaths[index]! + + if (current.length === 0) { + continue + } + + for ( + let offset = index + 1; + offset < firstCharacterPaths.length; + offset++ + ) { + let other = firstCharacterPaths[offset]! + + if (other.length === 0) { + continue + } + + if ( + hasShadowingDirection({ + longerPaths: current, + shorterPaths: other, + matcherContext, + }) || + hasShadowingDirection({ + shorterPaths: current, + longerPaths: other, + matcherContext, + }) + ) { + return true + } + } + } + + return false +} + +function unicodePropertyContainsCodePoint({ + propertySet, + codePoint, +}: { + propertySet: CharacterUnicodePropertyCharacterSet + codePoint: number +}): boolean | null { + let cacheKey = `${propertySet.key}:${propertySet.value ?? ''}:${ + propertySet.negate ? '1' : '0' + }` + let cached = unicodePropertyRegexCache.get(cacheKey) + + if (!cached) { + try { + let identifier = + propertySet.value === null + ? propertySet.key + : `${propertySet.key}=${propertySet.value}` + cached = new RegExp(String.raw`\p{${identifier}}`, 'u') + unicodePropertyRegexCache.set(cacheKey, cached) + } catch { + return null + } + } + + return cached.test(String.fromCodePoint(codePoint)) +} + +function doMatchersOverlap({ + matcherContext, + right, + left, +}: { + right: CharacterMatcherCharacterClass | CharacterMatcherCharacterSet + matcherContext: MatcherEvaluationContext + left: CharacterMatcher +}): boolean { + // Left is always a literal character in reachable flows. + /* c8 ignore next 3 */ + if (left.type !== 'character') { + return false + } + + if (right.type === 'character-class') { + return matcherContainsCharacter({ + characterClass: right.value, + codePoint: left.value, + matcherContext, + }) + } + + return ( + characterSetContainsCodePoint({ + characterSet: right.value, + codePoint: left.value, + matcherContext, + }) ?? false + ) +} + +function classElementContainsCodePoint({ + matcherContext, + codePoint, + element, +}: { + matcherContext: MatcherEvaluationContext + element: CharacterClassElement + codePoint: number +}): boolean | null { + switch (element.type) { + case 'CharacterClassRange': { + return characterClassRangeContainsCodePoint({ + range: element, + codePoint, + }) + } + case 'CharacterSet': { + return characterSetContainsCodePoint({ + characterSet: element, + matcherContext, + codePoint, + }) + } + case 'Character': { + return element.value === codePoint + } + default: { + return null + } + } +} + +function characterClassContainsCodePoint({ + characterClass, + matcherContext, + codePoint, +}: { + matcherContext: MatcherEvaluationContext + characterClass: CharacterClass + codePoint: number +}): boolean | null { + if (characterClass.unicodeSets) { + return null + } + + let isMatched = false + + for (let element of characterClass.elements) { + if ( + classElementContainsCodePoint({ + matcherContext, + codePoint, + element, + }) + ) { + isMatched = true + break + } + } + + return characterClass.negate ? !isMatched : isMatched +} + +function getCharacterSetIdentifier(characterSet: CharacterSet): string | null { + switch (characterSet.kind) { + case 'property': { + if (characterSet.strings) { + return null + } + + return `${characterSet.key}:${characterSet.value ?? ''}` + } + case 'digit': + case 'space': + case 'word': + case 'any': { + return characterSet.kind + } + default: { + return null + } + } +} + +function matcherContainsCharacter({ + matcherContext, + characterClass, + codePoint, +}: { + matcherContext: MatcherEvaluationContext + characterClass: CharacterClass + codePoint: number +}): boolean { + return ( + characterClassContainsCodePoint({ + characterClass, + matcherContext, + codePoint, + }) ?? false + ) +} + +function isWildcardMatcher( + matcher: CharacterMatcherCharacterClass | CharacterMatcherCharacterSet, +): boolean { + if (matcher.type === 'character-set') { + return matcher.value.kind === 'any' + } + + return isComplementaryCharacterClass(matcher.value) +} + +function matchesAnyCharacterSet({ + matcherContext, + codePoint, +}: { + matcherContext: MatcherEvaluationContext + codePoint: number +}): boolean { + if (matcherContext.dotAll) { + return true + } + + return !isLineTerminator(codePoint) +} + +function isWordCharacter(codePoint: number): boolean { + return ( + isDigitCharacter(codePoint) || + isLowercaseCharacter(codePoint) || + isUppercaseCharacter(codePoint) || + codePoint === UNDERSCORE_CODE_POINT + ) +} + +function characterClassRangeContainsCodePoint({ + codePoint, + range, +}: { + range: CharacterClassRange + codePoint: number +}): boolean { + return range.min.value <= codePoint && codePoint <= range.max.value +} + +function isLineTerminator(codePoint: number): boolean { + return ( + codePoint === 0x0a || + codePoint === 0x0d || + codePoint === 0x2028 || + codePoint === 0x2029 + ) +} + +function isWhitespaceCharacter(codePoint: number): boolean { + return WHITESPACE_PATTERN.test(String.fromCodePoint(codePoint)) +} + +function applyNegation(node: { negate?: boolean }, result: boolean): boolean { + return node.negate ? !result : result +} + +const WHITESPACE_PATTERN = /\s/u +const UNDERSCORE_CODE_POINT = '_'.codePointAt(0)! +let unicodePropertyRegexCache = new Map() diff --git a/rules/sort-regexp/is-capturing-context.ts b/rules/sort-regexp/is-capturing-context.ts new file mode 100644 index 000000000..09415c123 --- /dev/null +++ b/rules/sort-regexp/is-capturing-context.ts @@ -0,0 +1,22 @@ +import type { + CapturingGroup, + Alternative, + Pattern, + Group, +} from '@eslint-community/regexpp/ast' + +/** + * Checks whether an alternative is nested inside a sortable capturing context. + * + * @param node - Parent node of the alternative. + * @returns True when the parent supports alternative reordering. + */ +export function isCapturingContext( + node: Alternative['parent'], +): node is CapturingGroup | Pattern | Group { + return ( + node.type === 'CapturingGroup' || + node.type === 'Group' || + node.type === 'Pattern' + ) +} diff --git a/rules/sort-regexp/types.ts b/rules/sort-regexp/types.ts new file mode 100644 index 000000000..046ee4452 --- /dev/null +++ b/rules/sort-regexp/types.ts @@ -0,0 +1,78 @@ +import type { JSONSchema4 } from '@typescript-eslint/utils/json-schema' + +import type { + CustomGroupsOption, + CommonOptions, + GroupsOptions, + RegexOption, +} from '../../types/common-options' + +import { + buildCustomGroupSelectorJsonSchema, + regexJsonSchema, +} from '../../utils/common-json-schemas' + +/** + * Configuration options for the sort-regexp rule. + * + * Controls how alternation branches inside regular expression literals are + * sorted, while still supporting the shared Perfectionist sorting options and + * custom grouping capabilities. + */ +export type Options = [ + Partial< + { + /** + * Custom groups used to arrange alternation branches based on alias names + * or pattern content. + */ + customGroups: CustomGroupsOption + + /** Describes the group ordering applied during sorting. */ + groups: GroupsOptions + + /** + * Determines whether named capturing group aliases (e.g. `(?...)`) + * should be ignored during comparisons. + */ + ignoreAlias: boolean + } & CommonOptions + >, +] + +/** Configuration for a single custom group of alternation branches. */ +export interface SingleCustomGroup { + /** Regular expression pattern that matches the full branch text. */ + elementValuePattern?: RegexOption + + /** Regular expression pattern that matches the alias name (`?`). */ + elementNamePattern?: RegexOption + + /** + * Branch selector. `alias` targets named groups, `pattern` targets other + * branches. + */ + selector?: Selector +} + +/** Available selectors for alternation branches. */ +export type Selector = 'pattern' | 'alias' + +/** No modifiers are currently defined for regex branches. */ +export type Modifier = never + +/** Complete selector list used for validation and schema generation. */ +export let allSelectors: Selector[] = ['alias', 'pattern'] + +/** No modifiers exist, but export an empty array for API consistency. */ +export let allModifiers: Modifier[] = [] + +/** JSON schema describing a single custom group configuration. */ +export let singleCustomGroupJsonSchema: Record = { + selector: buildCustomGroupSelectorJsonSchema(allSelectors), + elementValuePattern: regexJsonSchema, + elementNamePattern: regexJsonSchema, +} + +/** Built-in group identifiers. Custom group names are also allowed at runtime. */ +export type Group = 'pattern' | 'unknown' | 'alias' | string diff --git a/test/rules/sort-regexp-shadowing.test.ts b/test/rules/sort-regexp-shadowing.test.ts new file mode 100644 index 000000000..eb32b560d --- /dev/null +++ b/test/rules/sort-regexp-shadowing.test.ts @@ -0,0 +1,997 @@ +import type { + CharacterUnicodePropertyCharacterSet, + CharacterClassElement, + CapturingGroup, + CharacterClass, + CharacterSet, + Alternative, + Element, + Group, +} from '@eslint-community/regexpp/ast' + +import { parseRegExpLiteral } from '@eslint-community/regexpp' +import { afterEach, describe, expect, it, vi } from 'vitest' + +import type { FirstCharacterPath } from '../../rules/sort-regexp/get-first-character-paths' + +import * as firstCharacterPathsModule from '../../rules/sort-regexp/get-first-character-paths' +import { hasShadowingAlternatives } from '../../rules/sort-regexp/has-shadowing-alternatives' + +let { getFirstCharacterPaths } = firstCharacterPathsModule + +function expectDotAllOverlapFor(codePoint: number): void { + let literal = parseRegExpLiteral(/ab|cd/u) + let wildcardClass = parseRegExpLiteral(/a/u).pattern.alternatives[0]! + .elements[0]! as CharacterClass + let dotLiteral = parseRegExpLiteral(/./u) + let dotSet = dotLiteral.pattern.alternatives[0]!.elements[0]! as CharacterSet + + wildcardClass.elements = [ + { + ...dotSet, + parent: wildcardClass, + } as CharacterClassElement, + ] + + let longerPath = { + matcher: { type: 'character', value: codePoint }, + requiresMore: true, + canMatchMore: true, + } as FirstCharacterPath + let shorterPath = { + matcher: { type: 'character-class', value: wildcardClass }, + requiresMore: false, + canMatchMore: false, + } as FirstCharacterPath + let spy = vi.spyOn(firstCharacterPathsModule, 'getFirstCharacterPaths') + + spy + .mockImplementationOnce(() => [longerPath]) + .mockImplementationOnce(() => [shorterPath]) + .mockImplementationOnce(() => [longerPath]) + .mockImplementationOnce(() => [shorterPath]) + + expect( + hasShadowingAlternatives({ + alternatives: literal.pattern.alternatives, + flags: literal.flags.raw, + }), + ).toBeFalsy() + + expect( + hasShadowingAlternatives({ + alternatives: literal.pattern.alternatives, + flags: `${literal.flags.raw}s`, + }), + ).toBeTruthy() + + spy.mockRestore() +} + +function serialize(paths: FirstCharacterPath[]): Record[] { + return paths.map(path => { + if (path.matcher.type === 'character') { + return { + requiresMore: path.requiresMore, + value: path.matcher.value, + type: 'character', + } + } + + if (path.matcher.type === 'character-class') { + return { + requiresMore: path.requiresMore, + raw: path.matcher.value.raw, + type: 'character-class', + } + } + + return { + negate: + 'negate' in path.matcher.value ? path.matcher.value.negate : false, + requiresMore: path.requiresMore, + kind: path.matcher.value.kind, + type: 'character-set', + } + }) +} + +function analyzeShadowingWithPaths( + pathSets: FirstCharacterPath[][], + flags = 'u', +): boolean { + let literal = parseRegExpLiteral(/a|b/u) + let spy = vi.spyOn(firstCharacterPathsModule, 'getFirstCharacterPaths') + + for (let paths of pathSets) { + spy.mockImplementationOnce(() => paths) + } + + let result = hasShadowingAlternatives({ + alternatives: literal.pattern.alternatives, + flags, + }) + + spy.mockRestore() + + return result +} + +function analyzeShadowing(pattern: RegExp): boolean { + let literal = parseRegExpLiteral(pattern) + return hasShadowingAlternatives({ + alternatives: literal.pattern.alternatives, + flags: literal.flags.raw, + }) +} + +function getAlternative(pattern: RegExp, index = 0): Alternative { + let literal = parseRegExpLiteral(pattern) + return literal.pattern.alternatives[index]! +} + +function code(value: string): number { + return value.codePointAt(0)! +} + +describe('sort-regexp shadowing helpers', () => { + afterEach(() => { + vi.restoreAllMocks() + }) + + it('detects literal prefixes with trailing characters', () => { + let paths = getFirstCharacterPaths(getAlternative(/ab/u)) + + expect(serialize(paths)).toEqual([ + { + requiresMore: true, + type: 'character', + value: code('a'), + }, + ]) + }) + + it('expands optional quantifiers and assertions', () => { + let paths = getFirstCharacterPaths(getAlternative(/^a?b/u)) + + expect(serialize(paths)).toEqual([ + { + requiresMore: true, + type: 'character', + value: code('a'), + }, + { + requiresMore: false, + type: 'character', + value: code('b'), + }, + ]) + }) + + it('handles character classes and ranges', () => { + let paths = getFirstCharacterPaths(getAlternative(/[a-c]z/u)) + + expect(serialize(paths)).toEqual([ + { + type: 'character-class', + requiresMore: true, + raw: '[a-c]', + }, + ]) + }) + + it('tracks nested groups and alternatives', () => { + let paths = getFirstCharacterPaths(getAlternative(/(?:ab|cd)e/u)) + + expect(serialize(paths)).toEqual([ + { + requiresMore: true, + type: 'character', + value: code('a'), + }, + { + requiresMore: true, + type: 'character', + value: code('c'), + }, + ]) + }) + + it('captures character sets inside quantifiers', () => { + let paths = getFirstCharacterPaths(getAlternative(/\d+/u)) + + expect(serialize(paths)).toEqual([ + { + type: 'character-set', + requiresMore: false, + kind: 'digit', + negate: false, + }, + ]) + }) + + it('marks alternatives that can optionally consume additional characters', () => { + let paths = getFirstCharacterPaths( + getAlternative(/[0-9a-f]{1,6}[\t\n\f\r ]?/iu), + ) + + expect(paths).toEqual([ + expect.objectContaining({ + requiresMore: false, + canMatchMore: true, + }), + ]) + }) + + it('keeps wildcard alternatives as single-character matches', () => { + let paths = getFirstCharacterPaths(getAlternative(/[\s\S]/u)) + + expect(paths).toEqual([ + expect.objectContaining({ + canMatchMore: false, + requiresMore: false, + }), + ]) + }) + + it('counts required repetitions for bounded quantifiers', () => { + let paths = getFirstCharacterPaths(getAlternative(/a{2,4}/u)) + + expect(serialize(paths)).toEqual([ + { + requiresMore: true, + type: 'character', + value: code('a'), + }, + ]) + }) + + it('skips paths that start with backreferences', () => { + let literal = parseRegExpLiteral(/(?a)(?\kb)/u) + let secondGroup = literal.pattern.alternatives[0]! + .elements[1]! as CapturingGroup + + expect(getFirstCharacterPaths(secondGroup.alternatives[0]!)).toEqual([]) + }) + + it('ignores quantifiers that only repeat backreferences', () => { + let paths = getFirstCharacterPaths(getAlternative(/(?a)\k+/u)) + + expect(serialize(paths)).toEqual([]) + }) + + it('stops collecting when reaching the safety limit', () => { + let alternatives = Array.from({ length: 40 }, (_, index) => `x${index}`) + let pattern = new RegExp(`(?:${alternatives.join('|')})`, 'u') + let paths = getFirstCharacterPaths(getAlternative(pattern)) + + expect(paths).toEqual([]) + }) + + it('skips unicode-set aware character classes entirely', () => { + // eslint-disable-next-line regexp/no-useless-character-class + let paths = getFirstCharacterPaths(getAlternative(/[a]/v)) + + expect(paths).toEqual([]) + }) + + it('skips property escapes that can match multi-codepoint strings', () => { + let paths = getFirstCharacterPaths(getAlternative(/\p{RGI_Emoji}/v)) + + expect(paths).toEqual([]) + }) + + it('ignores quantifiers whose inner matcher cannot produce a prefix', () => { + let paths = getFirstCharacterPaths(getAlternative(/\p{RGI_Emoji}+/v)) + + expect(paths).toEqual([]) + }) + + it('bails out when a quantifier has an unknown minimum length', () => { + let paths = getFirstCharacterPaths( + getAlternative(/(?:(?a)\k)+/u), + ) + + expect(paths).toEqual([]) + }) + + it('handles quantifiers whose inner element already spans multiple characters', () => { + let paths = getFirstCharacterPaths(getAlternative(/(?:ab)+/u)) + + expect(serialize(paths)).toEqual([ + { + requiresMore: true, + type: 'character', + value: code('a'), + }, + ]) + }) + + it('does not reuse recursive groups when computing rest length', () => { + let literal = parseRegExpLiteral(/a(?b)/u) + let alternative = literal.pattern.alternatives[0]! + let group = alternative.elements[1]! as CapturingGroup + let recursiveReference = group as unknown as Element + + group.alternatives[0]!.elements = [recursiveReference] + + expect(getFirstCharacterPaths(alternative)).toEqual([]) + }) + + it('reuses cached alternative lengths when branches share the same node', () => { + let literal = parseRegExpLiteral(/(?:a)/u) + let alternative = literal.pattern.alternatives[0]! + let group = alternative.elements[0]! as Group + let [innerAlternative] = group.alternatives + + group.alternatives.push(innerAlternative!) + + expect(serialize(getFirstCharacterPaths(alternative))).toEqual([ + { + requiresMore: false, + type: 'character', + value: code('a'), + }, + { + requiresMore: false, + type: 'character', + value: code('a'), + }, + ]) + }) + + it('skips prefixes when trailing content depends on backreferences', () => { + let paths = getFirstCharacterPaths(getAlternative(/(?a)\k/u)) + + expect(paths).toEqual([]) + }) + + it('handles expression character classes when evaluating rest lengths', () => { + expect( + getFirstCharacterPaths( + getAlternative(/a[\p{Script=Greek}&&\p{Letter}]/v), + ), + ).toEqual([]) + }) + + it('continues quantifier analysis when the minimum length is unknown', () => { + let paths = getFirstCharacterPaths( + // eslint-disable-next-line regexp/optimal-quantifier-concatenation, regexp/no-potentially-useless-backreference + getAlternative(/(?a)?(?:(?a)|\k)+/u), + ) + + expect(paths).toEqual([]) + }) + + it('propagates null rest lengths when later elements mix literals and backreferences', () => { + expect( + getFirstCharacterPaths(getAlternative(/(?a)b\k/u)), + ).toEqual([]) + }) + + it('saturates rest length computations when encountering multi-character tails', () => { + let paths = getFirstCharacterPaths(getAlternative(/a(?bc)d/u)) + + expect(serialize(paths)).toEqual([ + { + requiresMore: true, + type: 'character', + value: code('a'), + }, + ]) + }) + + it('avoids recursive alternative loops when computing suffix lengths', () => { + let literal = parseRegExpLiteral(/a/u) + let alternative = literal.pattern.alternatives[0]! + let group = { + alternatives: [alternative], + parent: alternative.parent, + type: 'Group', + raw: '', + } as unknown as Group + + alternative.elements.push(group) + + expect(getFirstCharacterPaths(alternative)).toEqual([]) + }) + + it('detects recursion in synthetic groups without relying on parser state', () => { + let alternative = { + elements: [] as Element[], + type: 'Alternative', + parent: null, + start: 0, + raw: '', + end: 0, + } as unknown as Alternative + let literal = { + parent: alternative, + type: 'Character', + value: code('a'), + start: 0, + raw: 'a', + end: 0, + } as Element + let group = { + alternatives: [alternative], + parent: alternative, + type: 'Group', + start: 0, + raw: '', + end: 0, + } as unknown as Group + + alternative.elements = [literal, group] + + expect(getFirstCharacterPaths(alternative)).toEqual([]) + }) +}) + +describe('hasShadowingAlternatives', () => { + it('detects overlapping newline classes', () => { + expect(analyzeShadowing(/\r\n|[\n\r\u{2028}\u{2029}]/u)).toBeTruthy() + }) + + it('detects optional newline sequences', () => { + // eslint-disable-next-line regexp/no-dupe-disjunctions + expect(analyzeShadowing(/\r?\n|[\n\r]/u)).toBeTruthy() + }) + + it('ignores wildcard-driven alternatives', () => { + expect( + analyzeShadowing(/(?specific|.+foo|.*bar|.?baz)/u), + ).toBeFalsy() + }) + + it('handles character classes with property escapes', () => { + // eslint-disable-next-line regexp/no-useless-character-class + expect(analyzeShadowing(/ab|[\p{ASCII}]/u)).toBeTruthy() + }) + + it('handles character classes with escape sets', () => { + // eslint-disable-next-line regexp/no-useless-character-class, regexp/prefer-w + expect(analyzeShadowing(/ab|[\w]/u)).toBeTruthy() + }) + + it('detects overlaps coming from script-aware property escapes inside classes', () => { + expect(analyzeShadowing(/Ωa|\p{Script=Greek}/u)).toBeTruthy() + }) + + it('detects negated character classes even when wrapped in groups', () => { + // eslint-disable-next-line regexp/no-useless-non-capturing-group + expect(analyzeShadowing(/(?:[^a])b|(?:[^a])/u)).toBeTruthy() + }) + + it('detects overlaps coming from ranges', () => { + expect(analyzeShadowing(/ab|[a-d]/u)).toBeTruthy() + }) + + it('detects negated character classes immediately', () => { + // eslint-disable-next-line regexp/no-dupe-disjunctions + expect(analyzeShadowing(/[^a]|b/u)).toBeTruthy() + }) + + it('falls back to raw comparison when needed', () => { + expect(analyzeShadowing(/ab|a/u)).toBeTruthy() + }) + + it('skips alternatives that cannot produce a deterministic prefix', () => { + // eslint-disable-next-line regexp/no-empty-alternative, regexp/no-dupe-disjunctions, regexp/no-useless-character-class + expect(analyzeShadowing(/(?|ab)|[\r]/u)).toBeFalsy() + }) + + it('handles unicode set enabled character classes', () => { + expect(analyzeShadowing(/ab|\p{Script=Greek}/v)).toBeFalsy() + }) + + it('ignores unicode properties that can expand to emoji strings', () => { + expect(analyzeShadowing(/\p{RGI_Emoji}|./v)).toBeFalsy() + }) + + it('detects overlaps coming from negated property escapes', () => { + expect(analyzeShadowing(/\P{Script=Greek}B|\P{Script=Greek}/u)).toBeTruthy() + }) + + it('detects overlaps when negated property escapes live inside classes', () => { + expect(analyzeShadowing(/Aa|\P{Script=Greek}/u)).toBeTruthy() + }) + + it('caches unicode property regexes between analyses', () => { + let propertyWithoutValue = /\p{ASCII}/u + let propertyWithValue = /\p{Script=Latin}/u + + expect(analyzeShadowing(propertyWithoutValue)).toBeFalsy() + expect(analyzeShadowing(propertyWithoutValue)).toBeFalsy() + + expect(analyzeShadowing(propertyWithValue)).toBeFalsy() + expect(analyzeShadowing(propertyWithValue)).toBeFalsy() + }) + + it('treats unsupported unicode properties as non-shadowing', () => { + let literal = parseRegExpLiteral(/\p{Script=Latin}/u) + let property = literal.pattern.alternatives[0]! + .elements[0]! as CharacterUnicodePropertyCharacterSet + + property.key = 'NotARealProperty' as typeof property.key + + expect( + hasShadowingAlternatives({ + alternatives: literal.pattern.alternatives, + flags: literal.flags.raw, + }), + ).toBeFalsy() + }) + + it('skips invalid property escapes nested inside character classes', () => { + // eslint-disable-next-line regexp/no-useless-character-class + let literal = parseRegExpLiteral(/Aa|[\p{Script=Latin}]/u) + let characterClass = literal.pattern.alternatives[1]! + .elements[0]! as CharacterClass + let propertyElement = characterClass.elements.find( + element => element.type === 'CharacterSet', + ) as CharacterUnicodePropertyCharacterSet + + propertyElement.key = 'NotARealProperty' as typeof propertyElement.key + + expect( + hasShadowingAlternatives({ + alternatives: literal.pattern.alternatives, + flags: literal.flags.raw, + }), + ).toBeFalsy() + }) + + it('detects overlaps when digit escapes appear inside classes', () => { + expect(analyzeShadowing(/1a|[\d_]/u)).toBeTruthy() + }) + + it('detects overlaps coming from space escapes inside classes', () => { + expect(analyzeShadowing(/\u{20}a|[\s_]/u)).toBeTruthy() + }) + + it(String.raw`detects overlaps caused by \w escapes inside classes`, () => { + expect(analyzeShadowing(/Aa|\w/u)).toBeTruthy() + }) + + it('detects underscore-prefixed literals shadowed by word character classes', () => { + expect(analyzeShadowing(/_a|\w/u)).toBeTruthy() + }) + + it('skips comparisons when the other alternative has no deterministic prefix', () => { + // eslint-disable-next-line no-useless-backreference, regexp/no-useless-backreference + expect(analyzeShadowing(/(?a)|\k/u)).toBeFalsy() + }) + + it('treats unknown character-set kinds as safe to ignore', () => { + // eslint-disable-next-line regexp/prefer-d, regexp/no-useless-character-class + let literal = parseRegExpLiteral(/1a|[\d]/u) + let characterClass = literal.pattern.alternatives[1]! + .elements[0]! as CharacterClass + let digitElement = characterClass.elements.find( + element => element.type === 'CharacterSet', + ) as CharacterSet + + digitElement.kind = 'unknown-kind' as CharacterSet['kind'] + + expect( + hasShadowingAlternatives({ + alternatives: literal.pattern.alternatives, + flags: literal.flags.raw, + }), + ).toBeFalsy() + }) + + it('ignores unexpected elements inside character classes when checking overlaps', () => { + // eslint-disable-next-line regexp/prefer-d, regexp/no-useless-character-class + let literal = parseRegExpLiteral(/\dA|[\d]/u) + let characterClass = literal.pattern.alternatives[1]! + .elements[0]! as CharacterClass + + let mutableElements = + characterClass.elements as unknown as CharacterClassElement[] + mutableElements.unshift({ + type: 'UnknownElement', + } as unknown as CharacterClassElement) + + expect( + hasShadowingAlternatives({ + alternatives: literal.pattern.alternatives, + flags: literal.flags.raw, + }), + ).toBeFalsy() + }) + + it('falls through class elements that never match the probed character', () => { + // eslint-disable-next-line regexp/no-useless-character-class + let literal = parseRegExpLiteral(/a|[b]a/u) + let characterClass = literal.pattern.alternatives[1]! + .elements[0]! as CharacterClass + + let mutableElements = + characterClass.elements as unknown as CharacterClassElement[] + mutableElements.unshift({ + type: 'UnknownElement', + } as unknown as CharacterClassElement) + + expect( + hasShadowingAlternatives({ + alternatives: literal.pattern.alternatives, + flags: literal.flags.raw, + }), + ).toBeFalsy() + }) + + it('falls through unknown character class elements before evaluating overlaps', () => { + let literal = parseRegExpLiteral(/ab|cd/u) + // eslint-disable-next-line regexp/no-useless-character-class + let characterClass = parseRegExpLiteral(/[a]/u).pattern.alternatives[0]! + .elements[0]! as CharacterClass + + let mutableElements = + characterClass.elements as unknown as CharacterClassElement[] + mutableElements.unshift({ + type: 'UnknownElement', + } as unknown as CharacterClassElement) + + let spy = vi.spyOn(firstCharacterPathsModule, 'getFirstCharacterPaths') + + spy + .mockImplementationOnce(() => [ + { + matcher: { type: 'character', value: code('z') }, + requiresMore: true, + canMatchMore: true, + }, + ]) + .mockImplementationOnce(() => [ + { + matcher: { type: 'character-class', value: characterClass }, + requiresMore: false, + canMatchMore: false, + }, + ]) + + expect( + hasShadowingAlternatives({ + alternatives: literal.pattern.alternatives, + flags: literal.flags.raw, + }), + ).toBeFalsy() + }) + + it('evaluates negated character classes during overlap detection with custom prefixes', () => { + let negatedClass = parseRegExpLiteral(/[^a]/u).pattern.alternatives[0]! + .elements[0]! as CharacterClass + + expect( + analyzeShadowingWithPaths([ + [ + { + matcher: { type: 'character', value: code('a') }, + requiresMore: true, + canMatchMore: true, + }, + ], + [ + { + matcher: { type: 'character-class', value: negatedClass }, + requiresMore: false, + canMatchMore: false, + }, + ], + ]), + ).toBeFalsy() + }) + + it('skips overlaps when character classes rely on string-based property escapes', () => { + let literal = parseRegExpLiteral(/ab|cd/u) + let emojiClassWrapper = parseRegExpLiteral(/[ab]/u) + let emojiClass = emojiClassWrapper.pattern.alternatives[0]! + .elements[0]! as CharacterClass + let emojiSet = parseRegExpLiteral(/\p{RGI_Emoji}/v).pattern.alternatives[0]! + .elements[0]! as CharacterSet + + emojiClass.unicodeSets = false + emojiClass.elements = [ + { + ...emojiSet, + parent: emojiClass, + strings: true, + } as CharacterClassElement, + ] + let spy = vi.spyOn(firstCharacterPathsModule, 'getFirstCharacterPaths') + + spy + .mockImplementationOnce(() => [ + { + matcher: { type: 'character', value: code('x') }, + requiresMore: true, + canMatchMore: true, + }, + ]) + .mockImplementationOnce(() => [ + { + matcher: { type: 'character-class', value: emojiClass }, + requiresMore: false, + canMatchMore: false, + }, + ]) + + expect( + hasShadowingAlternatives({ + alternatives: literal.pattern.alternatives, + flags: literal.flags.raw, + }), + ).toBeFalsy() + }) + + it('skips overlaps when character sets rely on string-based property escapes', () => { + let literal = parseRegExpLiteral(/ab|cd/u) + let emojiSet = parseRegExpLiteral(/\p{RGI_Emoji}/v).pattern.alternatives[0]! + .elements[0]! as CharacterSet + let spy = vi.spyOn(firstCharacterPathsModule, 'getFirstCharacterPaths') + + spy + .mockImplementationOnce(() => [ + { + matcher: { type: 'character', value: code('a') }, + requiresMore: true, + canMatchMore: true, + }, + ]) + .mockImplementationOnce(() => [ + { + matcher: { type: 'character-set', value: emojiSet }, + requiresMore: false, + canMatchMore: false, + }, + ]) + + expect( + hasShadowingAlternatives({ + alternatives: literal.pattern.alternatives, + flags: literal.flags.raw, + }), + ).toBeFalsy() + }) + + it('treats unicode-set based character classes as inconclusive overlaps', () => { + let literal = parseRegExpLiteral(/ab|cd/u) + // eslint-disable-next-line regexp/no-useless-character-class + let unicodeLiteral = parseRegExpLiteral(/[a]/v) + let unicodeClass = unicodeLiteral.pattern.alternatives[0]! + .elements[0]! as CharacterClass + let spy = vi.spyOn(firstCharacterPathsModule, 'getFirstCharacterPaths') + + spy + .mockImplementationOnce(() => [ + { + matcher: { type: 'character', value: code('x') }, + requiresMore: true, + canMatchMore: true, + }, + ]) + .mockImplementationOnce(() => [ + { + matcher: { type: 'character-class', value: unicodeClass }, + requiresMore: false, + canMatchMore: false, + }, + ]) + + expect( + hasShadowingAlternatives({ + alternatives: literal.pattern.alternatives, + flags: literal.flags.raw, + }), + ).toBeFalsy() + }) + + it('respects the dotAll flag when wildcard escapes live inside classes', () => { + let codePoints = [ + '\n'.codePointAt(0)!, + '\r'.codePointAt(0)!, + 0x2028, + 0x2029, + ] + + for (let codePoint of codePoints) { + expectDotAllOverlapFor(codePoint) + } + }) + + it('detects overlaps when wildcard matchers originate from character sets', () => { + let classLiteral = parseRegExpLiteral(/[a-z]b/u) + let classMatcher = classLiteral.pattern.alternatives[0]! + .elements[0]! as CharacterClass + let dotLiteral = parseRegExpLiteral(/./u) + let dotMatcher = dotLiteral.pattern.alternatives[0]! + .elements[0]! as CharacterSet + + expect( + analyzeShadowingWithPaths([ + [ + { + matcher: { type: 'character-class', value: classMatcher }, + requiresMore: true, + canMatchMore: true, + }, + ], + [ + { + matcher: { type: 'character-set', value: dotMatcher }, + requiresMore: false, + canMatchMore: false, + }, + ], + ]), + ).toBeTruthy() + }) + + it('detects overlaps when complementary property escapes form a wildcard class', () => { + let propertyWrapper = parseRegExpLiteral(/[a-z]/u) + let propertyClass = propertyWrapper.pattern.alternatives[0]! + .elements[0]! as CharacterClass + let asciiSet = parseRegExpLiteral(/\p{ASCII}/u).pattern.alternatives[0]! + .elements[0]! as CharacterSet + let nonAsciiSet = parseRegExpLiteral(/\P{ASCII}/u).pattern.alternatives[0]! + .elements[0]! as CharacterSet + + propertyClass.elements = [ + { + ...asciiSet, + parent: propertyClass, + } as CharacterClassElement, + { + ...nonAsciiSet, + parent: propertyClass, + } as CharacterClassElement, + ] + let classMatcher = parseRegExpLiteral(/[a-z]z/u).pattern.alternatives[0]! + .elements[0]! as CharacterClass + + expect( + analyzeShadowingWithPaths([ + [ + { + matcher: { type: 'character-class', value: classMatcher }, + requiresMore: true, + canMatchMore: true, + }, + ], + [ + { + matcher: { type: 'character-class', value: propertyClass }, + requiresMore: false, + canMatchMore: false, + }, + ], + ]), + ).toBeTruthy() + }) + + it('ignores string-based property escapes when checking wildcard classes', () => { + let emojiClassWrapper = parseRegExpLiteral(/[ab]/u) + let emojiClass = emojiClassWrapper.pattern.alternatives[0]! + .elements[0]! as CharacterClass + let emojiSet = parseRegExpLiteral(/\p{RGI_Emoji}/v).pattern.alternatives[0]! + .elements[0]! as CharacterSet + + emojiClass.unicodeSets = false + emojiClass.elements = [ + { + ...emojiSet, + parent: emojiClass, + strings: true, + } as CharacterClassElement, + ] + let classMatcher = parseRegExpLiteral(/[a-z]n/u).pattern.alternatives[0]! + .elements[0]! as CharacterClass + + expect( + analyzeShadowingWithPaths([ + [ + { + matcher: { type: 'character-class', value: classMatcher }, + requiresMore: true, + canMatchMore: true, + }, + ], + [ + { + matcher: { type: 'character-class', value: emojiClass }, + requiresMore: false, + canMatchMore: false, + }, + ], + ]), + ).toBeFalsy() + }) + + it('falls back gracefully when encountering unknown character set kinds', () => { + let patchedClass = parseRegExpLiteral(/[a-z]n/u).pattern.alternatives[0]! + .elements[0]! as CharacterClass + let wildcardClass = parseRegExpLiteral(/[\s\S]/u).pattern.alternatives[0]! + .elements[0]! as CharacterClass + + let mutableElements = + wildcardClass.elements as unknown as CharacterClassElement[] + + for (let index = 0; index < mutableElements.length; index++) { + let element = mutableElements[index]! + + if (element.type === 'CharacterSet') { + mutableElements[index] = { + ...element, + kind: 'custom-kind', + } as unknown as CharacterClassElement + } + } + + expect( + analyzeShadowingWithPaths([ + [ + { + matcher: { type: 'character-class', value: patchedClass }, + requiresMore: true, + canMatchMore: true, + }, + ], + [ + { + matcher: { type: 'character-class', value: wildcardClass }, + requiresMore: false, + canMatchMore: false, + }, + ], + ]), + ).toBeFalsy() + }) + + it('skips wildcard detection when a class is missing its element list', () => { + let patchedClass = parseRegExpLiteral(/[a-z]n/u).pattern.alternatives[0]! + .elements[0]! as CharacterClass + let wildcardClass = parseRegExpLiteral(/[\s\S]/u).pattern.alternatives[0]! + .elements[0]! as CharacterClass + + ;( + wildcardClass as unknown as { elements?: CharacterClass['elements'] } + ).elements = undefined + + expect( + analyzeShadowingWithPaths([ + [ + { + matcher: { type: 'character-class', value: patchedClass }, + requiresMore: true, + canMatchMore: true, + }, + ], + [ + { + matcher: { type: 'character-class', value: wildcardClass }, + requiresMore: false, + canMatchMore: false, + }, + ], + ]), + ).toBeFalsy() + }) + + it('detects overlaps when wildcard character sets follow multi-character classes', () => { + expect(analyzeShadowing(/[a-z]a|./u)).toBeTruthy() + }) + + it('detects overlaps when a wildcard branch could swallow escaped code points', () => { + let literal = parseRegExpLiteral(/\\(?:[0-9a-f]{1,6}[\t\n\f\r ]?|[\s\S])/iu) + let group = literal.pattern.alternatives[0]!.elements[1]! as CapturingGroup + + expect( + hasShadowingAlternatives({ + alternatives: group.alternatives, + flags: literal.flags.raw, + }), + ).toBeTruthy() + }) +}) diff --git a/test/rules/sort-regexp.test.ts b/test/rules/sort-regexp.test.ts new file mode 100644 index 000000000..8a38c2339 --- /dev/null +++ b/test/rules/sort-regexp.test.ts @@ -0,0 +1,3285 @@ +/* Cspell:ignore gimsu igmus yusmig ysumgi zyxabc Ωmega Δelta */ + +import { createRuleTester } from 'eslint-vitest-rule-tester' +import typescriptParser from '@typescript-eslint/parser' +import { describe, expect, it } from 'vitest' +import dedent from 'dedent' + +import { validateRuleJsonSchema } from '../utils/validate-rule-json-schema' +import { Alphabet } from '../../utils/alphabet' +import rule from '../../rules/sort-regexp' + +describe('sort-regexp', () => { + let { invalid, valid } = createRuleTester({ + parser: typescriptParser, + name: 'sort-regexp', + rule, + }) + + describe('alphabetical', () => { + let options = { + type: 'alphabetical', + order: 'asc', + } as const + + it('sorts regex flags', async () => { + await invalid({ + errors: [ + { + messageId: 'unexpectedRegExpOrder', + data: { right: 'g', left: 'i' }, + }, + ], + output: dedent` + /pattern/gimsu + `, + code: dedent` + /pattern/igmus + `, + options: [options], + }) + }) + + it('keeps already sorted flags', async () => { + await valid({ + code: dedent` + /pattern/gim + `, + options: [options], + }) + }) + + it('ignores non-regex literals', async () => { + await valid({ + code: dedent` + const sample = 'not a regular expression'; + `, + options: [options], + }) + }) + + it('sorts various flag combinations', async () => { + await invalid({ + errors: [ + { + messageId: 'unexpectedRegExpOrder', + data: { right: 's', left: 'y' }, + }, + ], + output: dedent` + /test/gimsuy + `, + code: dedent` + /test/ysumgi + `, + options: [options], + }) + }) + + it('sorts single letter flags', async () => { + await invalid({ + errors: [ + { + messageId: 'unexpectedRegExpOrder', + data: { right: 'i', left: 'm' }, + }, + ], + output: dedent` + /(abc)/im + `, + code: dedent` + /(abc)/mi + `, + options: [options], + }) + }) + + it('honors desc order for flags', async () => { + await invalid({ + errors: [ + { + messageId: 'unexpectedRegExpOrder', + data: { right: 'i', left: 'g' }, + }, + ], + options: [ + { + ...options, + order: 'desc', + }, + ], + output: dedent` + /pattern/yusmig + `, + code: dedent` + /pattern/gimsuy + `, + }) + }) + + it('sorts characters in character class', async () => { + await invalid({ + errors: [ + { + messageId: 'unexpectedRegExpOrder', + data: { right: 'a', left: 'z' }, + }, + ], + output: dedent` + /[axz]/ + `, + code: dedent` + /[zxa]/ + `, + options: [options], + }) + }) + + it('keeps escape sequences intact when sorting character classes', async () => { + await invalid({ + errors: [ + { + messageId: 'unexpectedRegExpOrder', + }, + ], + output: dedent(String.raw` + const re = /[,?.()[\]{}*\\\s#^+|$-]/g; + `), + code: dedent(String.raw` + const re = /[-[\]{}()*+?.,\\^$|#\s]/g; + `), + options: [options], + }) + }) + + it('sorts character classes with ranges', async () => { + await invalid({ + errors: [ + { + messageId: 'unexpectedRegExpOrder', + data: { right: '0-9', left: 'z' }, + }, + ], + output: dedent` + /[0-9a-fz]/ + `, + code: dedent` + /[z0-9a-f]/ + `, + options: [options], + }) + }) + + it('sorts mixed character class elements', async () => { + await invalid({ + errors: [ + { + messageId: 'unexpectedRegExpOrder', + data: { right: '0-9', left: 'z' }, + }, + ], + output: dedent` + /[0-9A-Za-fz]/ + `, + code: dedent` + /[z0-9a-fA-Z]/ + `, + options: [options], + }) + }) + + it('keeps already sorted character classes', async () => { + await valid({ + code: dedent` + /[0-9A-Za-z]/ + `, + options: [options], + }) + }) + + it('sorts character classes with special chars', async () => { + await invalid({ + errors: [ + { + data: { right: String.raw`\d`, left: String.raw`\w` }, + messageId: 'unexpectedRegExpOrder', + }, + ], + output: dedent(String.raw` + /[\d\w\s]/ + `), + code: dedent(String.raw` + /[\w\d\s]/ + `), + options: [options], + }) + }) + + it('sorts character classes with equivalent elements', async () => { + await invalid({ + errors: [ + { + data: { right: String.raw`\x61`, left: 'a' }, + messageId: 'unexpectedRegExpOrder', + }, + ], + output: dedent(String.raw` + /[\x61a]/ + `), + code: dedent(String.raw` + /[a\x61]/ + `), + options: [options], + }) + }) + + it('sorts character classes with ignoreCase disabled', async () => { + let customOptions = { + ...options, + ignoreCase: false, + } as const + + await invalid({ + errors: [ + { + messageId: 'unexpectedRegExpOrder', + }, + ], + output: dedent(String.raw` + /[\x61a]/ + `), + code: dedent(String.raw` + /[a\x61]/ + `), + options: [customOptions], + }) + }) + + it('sorts character classes with descending order', async () => { + let customOptions = { + ...options, + order: 'desc', + } as const + + await invalid({ + errors: [ + { + messageId: 'unexpectedRegExpOrder', + }, + ], + output: dedent(String.raw` + /[a\x61]/ + `), + code: dedent(String.raw` + /[\x61a]/ + `), + options: [customOptions], + }) + }) + + it('sorts character classes with standalone digits', async () => { + await invalid({ + errors: [ + { + messageId: 'unexpectedRegExpOrder', + data: { right: '1', left: '3' }, + }, + ], + output: dedent` + /[123]/ + `, + code: dedent` + /[312]/ + `, + options: [options], + }) + }) + + it('sorts character classes with uppercase letters', async () => { + await invalid({ + errors: [ + { + messageId: 'unexpectedRegExpOrder', + data: { right: 'A', left: 'C' }, + }, + ], + output: dedent` + /[ABC]/ + `, + code: dedent` + /[CBA]/ + `, + options: [options], + }) + }) + + it('sorts character classes with astral characters', async () => { + await invalid({ + errors: [ + { + data: { + right: String.raw`\u{1F600}-\u{1F602}`, + left: String.raw`\u{1F603}`, + }, + messageId: 'unexpectedRegExpOrder', + }, + ], + output: dedent(String.raw` + /[\u{1F600}-\u{1F602}\u{1F603}]/u + `), + code: dedent(String.raw` + /[\u{1F603}\u{1F600}-\u{1F602}]/u + `), + options: [options], + }) + }) + + it('sorts negated character classes', async () => { + await invalid({ + errors: [ + { + messageId: 'unexpectedRegExpOrder', + data: { right: 'a', left: 'z' }, + }, + ], + output: dedent` + /[^axz]/ + `, + code: dedent` + /[^zxa]/ + `, + options: [options], + }) + }) + + it('honors desc order in character classes', async () => { + await invalid({ + errors: [ + { + messageId: 'unexpectedRegExpOrder', + data: { right: 'z', left: 'a' }, + }, + ], + options: [ + { + ...options, + order: 'desc', + }, + ], + output: dedent` + /[za90]/ + `, + code: dedent` + /[az09]/ + `, + }) + }) + + it('sorts character class inside groups', async () => { + await invalid({ + errors: [ + { + messageId: 'unexpectedRegExpOrder', + data: { right: 'a', left: 'c' }, + }, + ], + output: dedent` + /([abc]+)/ + `, + code: dedent` + /([cba]+)/ + `, + options: [options], + }) + }) + + it('sorts multiple character classes in regex', async () => { + await invalid({ + errors: [ + { + messageId: 'unexpectedRegExpOrder', + data: { right: 'a', left: 'z' }, + }, + { + messageId: 'unexpectedRegExpOrder', + data: { right: '1', left: '3' }, + }, + ], + output: dedent` + /[axz].*[123]/ + `, + code: dedent` + /[zxa].*[321]/ + `, + options: [options], + }) + }) + + it('keeps single character in character class', async () => { + await valid({ + code: dedent` + /[a]/ + `, + options: [options], + }) + }) + + it('keeps empty character class', async () => { + await valid({ + code: dedent` + /[]/ + `, + options: [options], + }) + }) + + it('sorts regex literal in variable declaration', async () => { + await invalid({ + errors: [ + { + messageId: 'unexpectedRegExpOrder', + data: { right: 'b', left: 'c' }, + }, + { + messageId: 'unexpectedRegExpOrder', + data: { right: 'a', left: 'b' }, + }, + ], + output: dedent` + const re = /(a|b|c)/; + `, + code: dedent` + const re = /(c|b|a)/; + `, + options: [options], + }) + }) + + it('sorts regex literal inside function call', async () => { + await invalid({ + errors: [ + { + data: { right: 'apple', left: 'pear' }, + messageId: 'unexpectedRegExpOrder', + }, + ], + output: dedent` + test(/(apple|orange|pear)/); + `, + code: dedent` + test(/(pear|apple|orange)/); + `, + options: [options], + }) + }) + + it('keeps already sorted alternatives', async () => { + await valid({ + code: dedent` + /(aaa|bb|c)/gi + `, + options: [options], + }) + }) + + it('sorts plain alternations', async () => { + await invalid({ + errors: [ + { + messageId: 'unexpectedRegExpOrder', + data: { right: 'aaa', left: 'c' }, + }, + ], + output: dedent` + /(aaa|bb|c)/ + `, + code: dedent` + /(c|aaa|bb)/ + `, + options: [options], + }) + }) + + it('sorts named capturing group alternatives with ignoreAlias false', async () => { + await invalid({ + errors: [ + { + data: { right: 'a: (?aaa)', left: 'b: bbb' }, + messageId: 'unexpectedRegExpOrder', + }, + ], + output: dedent` + /(?(?aaa)|bbb)/ + `, + code: dedent` + /(?bbb|(?aaa))/ + `, + options: [options], + }) + }) + + it('ignores alias names when ignoreAlias is true', async () => { + await invalid({ + errors: [ + { + data: { right: '(?a)', left: 'b' }, + messageId: 'unexpectedRegExpOrder', + }, + ], + options: [ + { + ...options, + ignoreAlias: true, + }, + ], + output: dedent` + /(?(?a)|b)/ + `, + code: dedent` + /(?b|(?a))/ + `, + }) + }) + + it('respects custom groups by alias name', async () => { + await invalid({ + errors: [ + { + data: { + right: String.raw`digit: (?\d)`, + leftGroup: 'unknown', + rightGroup: 'digits', + left: 'other: z', + }, + messageId: 'unexpectedRegExpOrder', + }, + ], + options: [ + { + ...options, + customGroups: [ + { + elementNamePattern: '^digit$', + groupName: 'digits', + }, + ], + groups: ['digits', 'unknown'], + }, + ], + output: dedent` + /(?(?\d)|z)/ + `, + code: dedent` + /(?z|(?\d))/ + `, + }) + }) + + it('skips sorting alternatives with shadowed numbers', async () => { + await valid({ + code: dedent` + /(20|1|10|2|3)/ + `, + options: [options], + }) + }) + + it('does not reorder alternatives when negated character class overlaps literal', async () => { + await valid({ + code: dedent` + /(?:\([^)]*\)|[^;])+/ + `, + options: [options], + }) + }) + + it('does not reorder @import alternatives with greedy negated class', async () => { + await valid({ + code: dedent(String.raw` + /@import(?:\s*url\([^)]*\).*?|[^;]*);/ + `), + options: [options], + }) + }) + + it('does not reorder alternatives when character class overlaps multi-character sequence', async () => { + await valid({ + code: dedent(String.raw` + /\r\n|[\r\n\u2028\u2029]/ + `), + options: [options], + }) + }) + + it('does not reorder alternatives when optional sequence overlaps character class', async () => { + await valid({ + code: dedent(String.raw` + /\r?\n|[\r\n]/ + `), + options: [options], + }) + }) + + it('sorts alternatives with special characters', async () => { + await invalid({ + errors: [ + { + data: { right: '!@#', left: 'abc' }, + messageId: 'unexpectedRegExpOrder', + }, + { + data: { right: '&*', left: '$%^' }, + messageId: 'unexpectedRegExpOrder', + }, + ], + output: dedent` + /(!@#|&*|$%^|abc)/ + `, + code: dedent` + /(abc|!@#|$%^|&*)/ + `, + options: [options], + }) + }) + + it('skips sorting when empty alternative can shadow others', async () => { + await valid({ + code: dedent` + /(b||a)/ + `, + options: [options], + }) + }) + + it('skips sorting when alternatives contain unnamed capturing groups', async () => { + await valid({ + code: dedent` + const regex = /\\(.)|(['"])/gsu; + `, + options: [options], + }) + }) + + it('sorts alternatives with escaped characters', async () => { + await invalid({ + errors: [ + { + messageId: 'unexpectedRegExpOrder', + data: { right: 'aa', left: 'bb' }, + }, + ], + output: dedent` + /(aa|bb|cc)/ + `, + code: dedent` + /(bb|aa|cc)/ + `, + options: [options], + }) + }) + + it('sorts alternatives with character classes', async () => { + await valid({ + code: dedent` + /([0-9]|[A-Z]|[a-z])/ + `, + options: [options], + }) + }) + + it('sorts alternatives with quantifiers', async () => { + await invalid({ + errors: [ + { + messageId: 'unexpectedRegExpOrder', + data: { right: 'a+', left: 'b*' }, + }, + { + messageId: 'unexpectedRegExpOrder', + data: { right: 'a?', left: 'a+' }, + }, + ], + output: dedent` + /(a?|a{2,4}|a+|b*)/ + `, + code: dedent` + /(b*|a+|a?|a{2,4})/ + `, + options: [options], + }) + }) + + it('sorts alternatives with word boundaries', async () => { + await invalid({ + errors: [ + { + data: { right: String.raw`\\bcat\\b`, left: String.raw`\\bdog\\b` }, + messageId: 'unexpectedRegExpOrder', + }, + ], + output: dedent` + /(\\bcat\\b|\\bdog\\b)/ + `, + code: dedent` + /(\\bdog\\b|\\bcat\\b)/ + `, + options: [options], + }) + }) + + it('sorts alternatives with unicode characters', async () => { + await invalid({ + errors: [ + { + messageId: 'unexpectedRegExpOrder', + data: { right: 'A', left: 'B' }, + }, + ], + output: dedent` + /(A|B|C)/u + `, + code: dedent` + /(B|A|C)/u + `, + options: [options], + }) + }) + + it('sorts alternatives with lookahead assertions', async () => { + await invalid({ + errors: [ + { + data: { right: '(?=a)', left: '(?=b)' }, + messageId: 'unexpectedRegExpOrder', + }, + ], + output: dedent` + /((?=a)|(?=b)|(?=c))/ + `, + code: dedent` + /((?=b)|(?=a)|(?=c))/ + `, + options: [options], + }) + }) + + it('sorts alternatives with negative lookahead', async () => { + await invalid({ + errors: [ + { + data: { right: '(?!a)', left: '(?!b)' }, + messageId: 'unexpectedRegExpOrder', + }, + ], + output: dedent` + /((?!a)|(?!b)|(?!c))/ + `, + code: dedent` + /((?!b)|(?!a)|(?!c))/ + `, + options: [options], + }) + }) + + it('sorts alternatives with lookbehind assertions', async () => { + await invalid({ + errors: [ + { + data: { right: '(?<=a)', left: '(?<=b)' }, + messageId: 'unexpectedRegExpOrder', + }, + ], + output: dedent` + /((?<=a)|(?<=b)|(?<=c))/ + `, + code: dedent` + /((?<=b)|(?<=a)|(?<=c))/ + `, + options: [options], + }) + }) + + it('sorts alternatives with mixed metacharacters', async () => { + await invalid({ + errors: [ + { + data: { right: '^start', left: 'end$' }, + messageId: 'unexpectedRegExpOrder', + }, + ], + output: dedent` + /(^start|end$|middle)/ + `, + code: dedent` + /(end$|^start|middle)/ + `, + options: [options], + }) + }) + + it('skips alternatives with dot wildcard to avoid shadowing', async () => { + await valid({ + code: dedent` + /(specific|.+|.*|.?)/ + `, + options: [options], + }) + }) + + it('keeps escape alternatives before wildcard fallback', async () => { + await valid({ + code: dedent(String.raw` + const UNESCAPE = /\\([0-9A-Fa-f]{1,6}[ \f\n\r\t]?|[\s\S])/g; + `), + options: [options], + }) + }) + + it('skips sorting when alternatives are unnamed capturing groups', async () => { + await valid({ + code: dedent` + /((c|d)|(a|b)|(e|f))/ + `, + options: [options], + }) + }) + + it('sorts complex nested groups', async () => { + await invalid({ + errors: [ + { + messageId: 'unexpectedRegExpOrder', + data: { right: 'a', left: 'b' }, + }, + ], + output: dedent` + /(a|b|c)/ + `, + code: dedent` + /(b|a|c)/ + `, + options: [options], + }) + }) + + it('sorts alternatives with flags preserved', async () => { + await invalid({ + errors: [ + { + data: { left: 'BANANA', right: 'APPLE' }, + messageId: 'unexpectedRegExpOrder', + }, + ], + output: dedent` + /(APPLE|BANANA|CHERRY)/gim + `, + code: dedent` + /(BANANA|APPLE|CHERRY)/gim + `, + options: [options], + }) + }) + + it('sorts many alternatives', async () => { + await invalid({ + errors: [ + { + data: { right: 'alpha', left: 'beta' }, + messageId: 'unexpectedRegExpOrder', + }, + { + data: { right: 'epsilon', left: 'gamma' }, + messageId: 'unexpectedRegExpOrder', + }, + { + data: { right: 'eta', left: 'zeta' }, + messageId: 'unexpectedRegExpOrder', + }, + { + data: { right: 'iota', left: 'theta' }, + messageId: 'unexpectedRegExpOrder', + }, + { + data: { right: 'omega', left: 'xi' }, + messageId: 'unexpectedRegExpOrder', + }, + { + data: { right: 'phi', left: 'tau' }, + messageId: 'unexpectedRegExpOrder', + }, + ], + output: dedent` + /(alpha|beta|delta|epsilon|eta|gamma|iota|kappa|lambda|mu|nu|omega|phi|pi|psi|rho|sigma|tau|theta|xi|zeta)/ + `, + code: dedent` + /(beta|alpha|delta|gamma|epsilon|zeta|eta|theta|iota|kappa|lambda|mu|nu|xi|omega|pi|rho|sigma|tau|phi|psi)/ + `, + options: [options], + }) + }) + + it('sorts alternatives with backreferences', async () => { + await valid({ + code: dedent` + /(a+|b+|c+)/ + `, + options: [options], + }) + }) + + it('sorts alternatives with named backreferences', async () => { + await valid({ + code: dedent` + /(aa|bb|cc)/ + `, + options: [options], + }) + }) + + it('sorts alternatives inside single group without affecting backreference', async () => { + await invalid({ + errors: [ + { + data: { right: 'cat', left: 'dog' }, + messageId: 'unexpectedRegExpOrder', + }, + ], + output: dedent(String.raw` + /(cat|dog)\s\1/ + `), + code: dedent(String.raw` + /(dog|cat)\s\1/ + `), + options: [options], + }) + }) + + it('does not sort when multiple groups exist with backreferences', async () => { + await valid({ + code: dedent(String.raw` + /(c)(b)(a)\1\2\3/ + `), + options: [options], + }) + }) + + it('sorts alternatives with named group backreferences', async () => { + await invalid({ + errors: [ + { + data: { right: 'pet: cat', left: 'pet: dog' }, + messageId: 'unexpectedRegExpOrder', + }, + ], + output: dedent(String.raw` + /(?cat|dog)\s\k/ + `), + code: dedent(String.raw` + /(?dog|cat)\s\k/ + `), + options: [options], + }) + }) + + it('sorts alternatives with non-capturing groups', async () => { + await invalid({ + errors: [ + { + data: { right: '(?:aaa)', left: '(?:bbb)' }, + messageId: 'unexpectedRegExpOrder', + }, + ], + output: dedent` + /((?:aaa)|(?:bbb)|(?:ccc))/ + `, + code: dedent` + /((?:bbb)|(?:aaa)|(?:ccc))/ + `, + options: [options], + }) + }) + + it('sorts alternatives with control characters', async () => { + await invalid({ + errors: [ + { + data: { right: String.raw`\\cA`, left: String.raw`\\cB` }, + messageId: 'unexpectedRegExpOrder', + }, + ], + output: dedent` + /(\\cA|\\cB|\\cC)/ + `, + code: dedent` + /(\\cB|\\cA|\\cC)/ + `, + options: [options], + }) + }) + + it('sorts alternatives with octal escapes', async () => { + await invalid({ + errors: [ + { + data: { right: String.raw`\\101`, left: String.raw`\\102` }, + messageId: 'unexpectedRegExpOrder', + }, + ], + output: dedent` + /(\\101|\\102|\\103)/ + `, + code: dedent` + /(\\102|\\101|\\103)/ + `, + options: [options], + }) + }) + + it('sorts alternatives with hex escapes', async () => { + await invalid({ + errors: [ + { + data: { right: String.raw`\\x41`, left: String.raw`\\x42` }, + messageId: 'unexpectedRegExpOrder', + }, + ], + output: dedent` + /(\\x41|\\x42|\\x43)/ + `, + code: dedent` + /(\\x42|\\x41|\\x43)/ + `, + options: [options], + }) + }) + + it('sorts alternatives with whitespace characters', async () => { + await invalid({ + errors: [ + { + messageId: 'unexpectedRegExpOrder', + data: { right: 'a', left: 'b' }, + }, + ], + output: dedent` + /(a|b|c)/ + `, + code: dedent` + /(b|a|c)/ + `, + options: [options], + }) + }) + + it('sorts alternatives with digit and word shortcuts', async () => { + await invalid({ + errors: [ + { + data: { right: String.raw`\\d`, left: String.raw`\\w` }, + messageId: 'unexpectedRegExpOrder', + }, + { + data: { right: String.raw`\\D`, left: String.raw`\\W` }, + messageId: 'unexpectedRegExpOrder', + }, + ], + output: dedent` + /(\\d|\\D|\\s|\\S|\\w|\\W)/ + `, + code: dedent` + /(\\w|\\d|\\s|\\W|\\D|\\S)/ + `, + options: [options], + }) + }) + + it('sorts alternatives using alias custom groups', async () => { + let customOptions = { + ...options, + customGroups: [ + { + elementNamePattern: '^alpha$', + groupName: 'alpha-group', + selector: 'alias', + }, + ], + groups: ['alpha-group', 'pattern'], + } as const + + await invalid({ + errors: [ + { + data: { + right: 'alpha: (?aaa)', + left: 'beta: (?bbb)', + }, + messageId: 'unexpectedRegExpOrder', + }, + ], + output: dedent` + /(?aaa)|(?bbb)/ + `, + code: dedent` + /(?bbb)|(?aaa)/ + `, + options: [customOptions], + }) + }) + + it('sorts alternatives with case-insensitive flag', async () => { + await invalid({ + errors: [ + { + data: { left: 'banana', right: 'apple' }, + messageId: 'unexpectedRegExpOrder', + }, + ], + output: dedent` + /(apple|banana|cherry)/i + `, + code: dedent` + /(banana|apple|cherry)/i + `, + options: [options], + }) + }) + + it('sorts alternatives with multiline flag', async () => { + await invalid({ + errors: [ + { + data: { right: '^alpha', left: '^beta' }, + messageId: 'unexpectedRegExpOrder', + }, + ], + output: dedent` + /(^alpha|^beta|^gamma)/m + `, + code: dedent` + /(^beta|^alpha|^gamma)/m + `, + options: [options], + }) + }) + + it('sorts alternatives with sticky flag', async () => { + await invalid({ + errors: [ + { + data: { right: 'aaa', left: 'bbb' }, + messageId: 'unexpectedRegExpOrder', + }, + ], + output: dedent` + /(aaa|bbb|ccc)/y + `, + code: dedent` + /(bbb|aaa|ccc)/y + `, + options: [options], + }) + }) + + it('sorts alternatives with dotAll flag', async () => { + await invalid({ + errors: [ + { + data: { right: 'a.b', left: 'c.d' }, + messageId: 'unexpectedRegExpOrder', + }, + ], + output: dedent` + /(a.b|c.d|e.f)/s + `, + code: dedent` + /(c.d|a.b|e.f)/s + `, + options: [options], + }) + }) + + it('keeps sorting with one alternative', async () => { + await valid({ + code: dedent` + /(onlyOne)/ + `, + options: [options], + }) + }) + + it('keeps sorting with no alternatives', async () => { + await valid({ + code: dedent` + /noAlternatives/ + `, + options: [options], + }) + }) + + it('keeps already sorted complex regex', async () => { + await valid({ + code: dedent` + /(\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}|example\\.com|localhost)/ + `, + options: [options], + }) + }) + + it('works with complex cases', async () => { + await invalid({ + errors: [ + { + messageId: 'unexpectedRegExpOrder', + data: { right: 'g', left: 'i' }, + }, + { + data: { right: '0-9', left: 'a-z' }, + messageId: 'unexpectedRegExpOrder', + }, + { + data: { right: 'aaa', left: 'bb' }, + messageId: 'unexpectedRegExpOrder', + }, + ], + output: dedent` + /[0-9a-z].(aaa|bb|c)/gi + `, + code: dedent` + /[a-z0-9].(bb|aaa|c)/ig + `, + }) + }) + + it('does not sort alternatives when one is a prefix of another', async () => { + await valid({ + code: dedent` + /(ab|a)/ + `, + options: [options], + }) + }) + + it('sorts alternatives with common prefix but no prefix relationship', async () => { + await invalid({ + errors: [{ messageId: 'unexpectedRegExpOrder' }], + output: dedent` + /(abc|abd)/ + `, + code: dedent` + /(abd|abc)/ + `, + options: [options], + }) + }) + + it('keeps hyphen at the end when it is a literal character', async () => { + await valid({ + code: dedent` + /[abc-]/ + `, + options: [options], + }) + }) + + it('keeps hyphen at the beginning when it is a literal character', async () => { + await invalid({ + errors: [ + { + messageId: 'unexpectedRegExpOrder', + data: { right: 'a', left: '-' }, + }, + ], + output: dedent` + /[abc-]/ + `, + code: dedent` + /[-abc]/ + `, + options: [options], + }) + }) + + it('does not move hyphen from edge to middle', async () => { + await invalid({ + errors: [ + { + messageId: 'unexpectedRegExpOrder', + data: { right: 'a', left: 'z' }, + }, + ], + output: dedent` + /[axz-]/ + `, + code: dedent` + /[zxa-]/ + `, + options: [options], + }) + }) + + it('sorts character class with escaped hyphen normally', async () => { + await invalid({ + errors: [ + { + messageId: 'unexpectedRegExpOrder', + data: { right: 'a', left: 'z' }, + }, + ], + output: dedent(String.raw` + /[axz\-]/ + `), + code: dedent(String.raw` + /[z\-xa]/ + `), + options: [options], + }) + }) + + it('does not create accidental range from literal hyphen', async () => { + await invalid({ + errors: [ + { + messageId: 'unexpectedRegExpOrder', + data: { right: 'a', left: 'z' }, + }, + ], + output: dedent` + /[az-]/ + `, + code: dedent` + /[za-]/ + `, + options: [options], + }) + }) + + it('keeps literal hyphen last when mixed with character sets', async () => { + await valid({ + code: dedent(String.raw` + /[\w$-]/ + `), + options: [options], + }) + }) + + it('preserves range when hyphen is part of range', async () => { + await invalid({ + errors: [ + { + messageId: 'unexpectedRegExpOrder', + data: { right: '0-9', left: 'z' }, + }, + ], + output: dedent` + /[0-9a-fz]/ + `, + code: dedent` + /[z0-9a-f]/ + `, + options: [options], + }) + }) + + it('does not sort character classes with v flag (set operations)', async () => { + await valid({ + code: dedent(String.raw` + /[za]/v + `), + options: [options], + }) + }) + + it('does not sort character classes with v flag to avoid breaking set operations', async () => { + await valid({ + code: dedent` + /[zyxabc]/v + `, + options: [options], + }) + }) + + it('still sorts flags even with v flag present', async () => { + await invalid({ + errors: [ + { + messageId: 'unexpectedRegExpOrder', + data: { right: 'g', left: 'v' }, + }, + ], + output: dedent` + /pattern/gv + `, + code: dedent` + /pattern/vg + `, + options: [options], + }) + }) + }) + + describe('natural', () => { + let options = { + type: 'natural', + order: 'asc', + } as const + + it('sorts regex flags', async () => { + await invalid({ + errors: [ + { + messageId: 'unexpectedRegExpOrder', + data: { right: 'g', left: 'i' }, + }, + ], + output: dedent` + /pattern/gimsu + `, + code: dedent` + /pattern/igmus + `, + options: [options], + }) + }) + + it('keeps already sorted flags', async () => { + await valid({ + code: dedent` + /pattern/gim + `, + options: [options], + }) + }) + + it('ignores non-regex literals', async () => { + await valid({ + code: dedent` + const sample = 'not a regular expression'; + `, + options: [options], + }) + }) + + it('sorts various flag combinations', async () => { + await invalid({ + errors: [ + { + messageId: 'unexpectedRegExpOrder', + data: { right: 's', left: 'y' }, + }, + ], + output: dedent` + /test/gimsuy + `, + code: dedent` + /test/ysumgi + `, + options: [options], + }) + }) + + it('sorts single letter flags', async () => { + await invalid({ + errors: [ + { + messageId: 'unexpectedRegExpOrder', + data: { right: 'i', left: 'm' }, + }, + ], + output: dedent` + /(abc)/im + `, + code: dedent` + /(abc)/mi + `, + options: [options], + }) + }) + + it('honors desc order for flags', async () => { + await invalid({ + errors: [ + { + messageId: 'unexpectedRegExpOrder', + data: { right: 'i', left: 'g' }, + }, + ], + options: [ + { + ...options, + order: 'desc', + }, + ], + output: dedent` + /pattern/yusmig + `, + code: dedent` + /pattern/gimsuy + `, + }) + }) + + it('sorts characters in character class', async () => { + await invalid({ + errors: [ + { + messageId: 'unexpectedRegExpOrder', + data: { right: 'a', left: 'z' }, + }, + ], + output: dedent` + /[axz]/ + `, + code: dedent` + /[zxa]/ + `, + options: [options], + }) + }) + + it('sorts character classes with ranges', async () => { + await invalid({ + errors: [ + { + messageId: 'unexpectedRegExpOrder', + data: { right: '0-9', left: 'z' }, + }, + ], + output: dedent` + /[0-9a-fz]/ + `, + code: dedent` + /[z0-9a-f]/ + `, + options: [options], + }) + }) + + it('sorts mixed character class elements', async () => { + await invalid({ + errors: [ + { + messageId: 'unexpectedRegExpOrder', + data: { right: '0-9', left: 'z' }, + }, + ], + output: dedent` + /[0-9A-Za-fz]/ + `, + code: dedent` + /[z0-9a-fA-Z]/ + `, + options: [options], + }) + }) + + it('keeps already sorted character classes', async () => { + await valid({ + code: dedent` + /[0-9A-Za-z]/ + `, + options: [options], + }) + }) + + it('sorts character classes with special chars', async () => { + await invalid({ + errors: [ + { + data: { right: String.raw`\d`, left: String.raw`\w` }, + messageId: 'unexpectedRegExpOrder', + }, + ], + output: dedent(String.raw` + /[\d\w\s]/ + `), + code: dedent(String.raw` + /[\w\d\s]/ + `), + options: [options], + }) + }) + + it('sorts character classes with equivalent elements', async () => { + await invalid({ + errors: [ + { + messageId: 'unexpectedRegExpOrder', + }, + ], + output: dedent(String.raw` + /[\x61a]/ + `), + code: dedent(String.raw` + /[a\x61]/ + `), + options: [options], + }) + }) + + it('sorts character classes with ignoreCase disabled', async () => { + let customOptions = { + ...options, + ignoreCase: false, + } as const + + await invalid({ + errors: [ + { + messageId: 'unexpectedRegExpOrder', + }, + ], + output: dedent(String.raw` + /[\x61a]/ + `), + code: dedent(String.raw` + /[a\x61]/ + `), + options: [customOptions], + }) + }) + + it('sorts character classes', async () => { + await invalid({ + errors: [ + { + messageId: 'unexpectedRegExpOrder', + data: { right: '1', left: '3' }, + }, + ], + output: dedent` + /[123]/ + `, + code: dedent` + /[312]/ + `, + options: [options], + }) + }) + + it('sorts character classes with descending order', async () => { + let customOptions = { + ...options, + order: 'desc', + } as const + + await invalid({ + errors: [ + { + messageId: 'unexpectedRegExpOrder', + }, + ], + output: dedent(String.raw` + /[a\x61]/ + `), + code: dedent(String.raw` + /[\x61a]/ + `), + options: [customOptions], + }) + }) + + it('sorts character classes with standalone digits', async () => { + await invalid({ + errors: [ + { + messageId: 'unexpectedRegExpOrder', + data: { right: '1', left: '3' }, + }, + ], + output: dedent` + /[123]/ + `, + code: dedent` + /[312]/ + `, + options: [options], + }) + }) + + it('sorts character classes with uppercase letters', async () => { + await invalid({ + errors: [ + { + messageId: 'unexpectedRegExpOrder', + data: { right: 'A', left: 'C' }, + }, + ], + output: dedent` + /[ABC]/ + `, + code: dedent` + /[CBA]/ + `, + options: [options], + }) + }) + + it('sorts character classes with astral characters', async () => { + await invalid({ + errors: [ + { + data: { + right: String.raw`\u{1F600}-\u{1F602}`, + left: String.raw`\u{1F603}`, + }, + messageId: 'unexpectedRegExpOrder', + }, + ], + output: dedent(String.raw` + /[\u{1F600}-\u{1F602}\u{1F603}]/u + `), + code: dedent(String.raw` + /[\u{1F603}\u{1F600}-\u{1F602}]/u + `), + options: [options], + }) + }) + + it('sorts negated character classes', async () => { + await invalid({ + errors: [ + { + messageId: 'unexpectedRegExpOrder', + data: { right: 'a', left: 'z' }, + }, + ], + output: dedent` + /[^axz]/ + `, + code: dedent` + /[^zxa]/ + `, + options: [options], + }) + }) + + it('honors desc order in character classes', async () => { + await invalid({ + errors: [ + { + messageId: 'unexpectedRegExpOrder', + data: { right: 'z', left: 'a' }, + }, + ], + options: [ + { + ...options, + order: 'desc', + }, + ], + output: dedent` + /[za90]/ + `, + code: dedent` + /[az09]/ + `, + }) + }) + + it('sorts character class inside groups', async () => { + await invalid({ + errors: [ + { + messageId: 'unexpectedRegExpOrder', + data: { right: 'a', left: 'c' }, + }, + ], + output: dedent` + /([abc]+)/ + `, + code: dedent` + /([cba]+)/ + `, + options: [options], + }) + }) + + it('sorts multiple character classes in regex', async () => { + await invalid({ + errors: [ + { + messageId: 'unexpectedRegExpOrder', + data: { right: 'a', left: 'z' }, + }, + { + messageId: 'unexpectedRegExpOrder', + data: { right: '1', left: '3' }, + }, + ], + output: dedent` + /[axz].*[123]/ + `, + code: dedent` + /[zxa].*[321]/ + `, + options: [options], + }) + }) + + it('keeps single character in character class', async () => { + await valid({ + code: dedent` + /[a]/ + `, + options: [options], + }) + }) + + it('keeps empty character class', async () => { + await valid({ + code: dedent` + /[]/ + `, + options: [options], + }) + }) + + it('sorts regex literal in variable declaration', async () => { + await invalid({ + errors: [ + { + messageId: 'unexpectedRegExpOrder', + data: { right: 'b', left: 'c' }, + }, + { + messageId: 'unexpectedRegExpOrder', + data: { right: 'a', left: 'b' }, + }, + ], + output: dedent` + const re = /(a|b|c)/; + `, + code: dedent` + const re = /(c|b|a)/; + `, + options: [options], + }) + }) + + it('sorts regex literal inside function call', async () => { + await invalid({ + errors: [ + { + data: { right: 'apple', left: 'pear' }, + messageId: 'unexpectedRegExpOrder', + }, + ], + output: dedent` + test(/(apple|orange|pear)/); + `, + code: dedent` + test(/(pear|apple|orange)/); + `, + options: [options], + }) + }) + + it('keeps already sorted alternatives', async () => { + await valid({ + code: dedent` + /(aaa|bb|c)/gi + `, + options: [options], + }) + }) + + it('sorts plain alternations', async () => { + await invalid({ + errors: [ + { + messageId: 'unexpectedRegExpOrder', + data: { right: 'aaa', left: 'c' }, + }, + ], + output: dedent` + /(aaa|bb|c)/ + `, + code: dedent` + /(c|aaa|bb)/ + `, + options: [options], + }) + }) + + it('sorts named capturing group alternatives with ignoreAlias false', async () => { + await invalid({ + errors: [ + { + data: { right: 'a: (?aaa)', left: 'b: bbb' }, + messageId: 'unexpectedRegExpOrder', + }, + ], + output: dedent` + /(?(?aaa)|bbb)/ + `, + code: dedent` + /(?bbb|(?aaa))/ + `, + options: [options], + }) + }) + + it('ignores alias names when ignoreAlias is true', async () => { + await invalid({ + errors: [ + { + data: { right: '(?a)', left: 'b' }, + messageId: 'unexpectedRegExpOrder', + }, + ], + options: [ + { + ...options, + ignoreAlias: true, + }, + ], + output: dedent` + /(?(?a)|b)/ + `, + code: dedent` + /(?b|(?a))/ + `, + }) + }) + + it('respects custom groups by alias name', async () => { + await invalid({ + errors: [ + { + data: { + right: String.raw`digit: (?\d)`, + leftGroup: 'unknown', + rightGroup: 'digits', + left: 'other: z', + }, + messageId: 'unexpectedRegExpOrder', + }, + ], + options: [ + { + ...options, + customGroups: [ + { + elementNamePattern: '^digit$', + groupName: 'digits', + }, + ], + groups: ['digits', 'unknown'], + }, + ], + output: dedent` + /(?(?\d)|z)/ + `, + code: dedent` + /(?z|(?\d))/ + `, + }) + }) + + it('skips sorting alternatives with shadowed numbers', async () => { + await valid({ + code: dedent` + /(20|1|10|2|3)/ + `, + options: [options], + }) + }) + + it('does not reorder alternatives when negated character class overlaps literal', async () => { + await valid({ + code: dedent` + /(?:\([^)]*\)|[^;])+/ + `, + options: [options], + }) + }) + + it('does not reorder @import alternatives with greedy negated class', async () => { + await valid({ + code: dedent(String.raw` + /@import(?:\s*url\([^)]*\).*?|[^;]*);/ + `), + options: [options], + }) + }) + + it('does not reorder alternatives when character class overlaps multi-character sequence', async () => { + await valid({ + code: dedent(String.raw` + /\r\n|[\r\n\u2028\u2029]/ + `), + options: [options], + }) + }) + + it('does not reorder alternatives when optional sequence overlaps character class', async () => { + await valid({ + code: dedent(String.raw` + /\r?\n|[\r\n]/ + `), + options: [options], + }) + }) + + it('sorts alternatives with special characters', async () => { + await invalid({ + errors: [ + { + data: { right: '!@#', left: 'abc' }, + messageId: 'unexpectedRegExpOrder', + }, + ], + output: dedent` + /(!@#|$%^|&*|abc)/ + `, + code: dedent` + /(abc|!@#|$%^|&*)/ + `, + options: [options], + }) + }) + + it('skips sorting when empty alternative can shadow others', async () => { + await valid({ + code: dedent` + /(b||a)/ + `, + options: [options], + }) + }) + + it('sorts alternatives with escaped characters', async () => { + await invalid({ + errors: [ + { + messageId: 'unexpectedRegExpOrder', + data: { right: 'aa', left: 'bb' }, + }, + ], + output: dedent` + /(aa|bb|cc)/ + `, + code: dedent` + /(bb|aa|cc)/ + `, + options: [options], + }) + }) + + it('sorts alternatives with character classes', async () => { + await valid({ + code: dedent` + /([0-9]|[A-Z]|[a-z])/ + `, + options: [options], + }) + }) + + it('sorts alternatives with quantifiers', async () => { + await invalid({ + errors: [ + { + messageId: 'unexpectedRegExpOrder', + data: { right: 'a+', left: 'b*' }, + }, + ], + output: dedent` + /(a+|a?|a{2,4}|b*)/ + `, + code: dedent` + /(b*|a+|a?|a{2,4})/ + `, + options: [options], + }) + }) + + it('sorts alternatives with word boundaries', async () => { + await invalid({ + errors: [ + { + data: { right: String.raw`\\bcat\\b`, left: String.raw`\\bdog\\b` }, + messageId: 'unexpectedRegExpOrder', + }, + ], + output: dedent` + /(\\bcat\\b|\\bdog\\b)/ + `, + code: dedent` + /(\\bdog\\b|\\bcat\\b)/ + `, + options: [options], + }) + }) + + it('sorts alternatives with unicode characters', async () => { + await invalid({ + errors: [ + { + messageId: 'unexpectedRegExpOrder', + data: { right: 'A', left: 'B' }, + }, + ], + output: dedent` + /(A|B|C)/u + `, + code: dedent` + /(B|A|C)/u + `, + options: [options], + }) + }) + + it('sorts alternatives with lookahead assertions', async () => { + await invalid({ + errors: [ + { + data: { right: '(?=a)', left: '(?=b)' }, + messageId: 'unexpectedRegExpOrder', + }, + ], + output: dedent` + /((?=a)|(?=b)|(?=c))/ + `, + code: dedent` + /((?=b)|(?=a)|(?=c))/ + `, + options: [options], + }) + }) + + it('sorts alternatives with negative lookahead', async () => { + await invalid({ + errors: [ + { + data: { right: '(?!a)', left: '(?!b)' }, + messageId: 'unexpectedRegExpOrder', + }, + ], + output: dedent` + /((?!a)|(?!b)|(?!c))/ + `, + code: dedent` + /((?!b)|(?!a)|(?!c))/ + `, + options: [options], + }) + }) + + it('sorts alternatives with lookbehind assertions', async () => { + await invalid({ + errors: [ + { + data: { right: '(?<=a)', left: '(?<=b)' }, + messageId: 'unexpectedRegExpOrder', + }, + ], + output: dedent` + /((?<=a)|(?<=b)|(?<=c))/ + `, + code: dedent` + /((?<=b)|(?<=a)|(?<=c))/ + `, + options: [options], + }) + }) + + it('sorts alternatives with mixed metacharacters', async () => { + await invalid({ + errors: [ + { + data: { right: '^start', left: 'end$' }, + messageId: 'unexpectedRegExpOrder', + }, + ], + output: dedent` + /(^start|end$|middle)/ + `, + code: dedent` + /(end$|^start|middle)/ + `, + options: [options], + }) + }) + + it('skips alternatives with dot wildcard to avoid shadowing', async () => { + await valid({ + code: dedent` + /(specific|.+|.*|.?)/ + `, + options: [options], + }) + }) + + it('keeps escape alternatives before wildcard fallback', async () => { + await valid({ + code: dedent(String.raw` + const UNESCAPE = /\\([0-9A-Fa-f]{1,6}[ \f\n\r\t]?|[\s\S])/g; + `), + options: [options], + }) + }) + + it('skips sorting unnamed capturing group alternatives in secondary suite', async () => { + await valid({ + code: dedent` + /((c|d)|(a|b)|(e|f))/ + `, + options: [options], + }) + }) + + it('sorts complex nested groups', async () => { + await invalid({ + errors: [ + { + messageId: 'unexpectedRegExpOrder', + data: { right: 'a', left: 'b' }, + }, + ], + output: dedent` + /(a|b|c)/ + `, + code: dedent` + /(b|a|c)/ + `, + options: [options], + }) + }) + + it('sorts alternatives with flags preserved', async () => { + await invalid({ + errors: [ + { + data: { left: 'BANANA', right: 'APPLE' }, + messageId: 'unexpectedRegExpOrder', + }, + ], + output: dedent` + /(APPLE|BANANA|CHERRY)/gim + `, + code: dedent` + /(BANANA|APPLE|CHERRY)/gim + `, + options: [options], + }) + }) + + it('sorts many alternatives', async () => { + await invalid({ + errors: [ + { + data: { right: 'alpha', left: 'beta' }, + messageId: 'unexpectedRegExpOrder', + }, + { + data: { right: 'epsilon', left: 'gamma' }, + messageId: 'unexpectedRegExpOrder', + }, + { + data: { right: 'eta', left: 'zeta' }, + messageId: 'unexpectedRegExpOrder', + }, + { + data: { right: 'iota', left: 'theta' }, + messageId: 'unexpectedRegExpOrder', + }, + { + data: { right: 'omega', left: 'xi' }, + messageId: 'unexpectedRegExpOrder', + }, + { + data: { right: 'phi', left: 'tau' }, + messageId: 'unexpectedRegExpOrder', + }, + ], + output: dedent` + /(alpha|beta|delta|epsilon|eta|gamma|iota|kappa|lambda|mu|nu|omega|phi|pi|psi|rho|sigma|tau|theta|xi|zeta)/ + `, + code: dedent` + /(beta|alpha|delta|gamma|epsilon|zeta|eta|theta|iota|kappa|lambda|mu|nu|xi|omega|pi|rho|sigma|tau|phi|psi)/ + `, + options: [options], + }) + }) + + it('sorts alternatives with backreferences', async () => { + await valid({ + code: dedent` + /(a+|b+|c+)/ + `, + options: [options], + }) + }) + + it('sorts alternatives with named backreferences', async () => { + await valid({ + code: dedent` + /(aa|bb|cc)/ + `, + options: [options], + }) + }) + + it('sorts alternatives with non-capturing groups', async () => { + await invalid({ + errors: [ + { + data: { right: '(?:aaa)', left: '(?:bbb)' }, + messageId: 'unexpectedRegExpOrder', + }, + ], + output: dedent` + /((?:aaa)|(?:bbb)|(?:ccc))/ + `, + code: dedent` + /((?:bbb)|(?:aaa)|(?:ccc))/ + `, + options: [options], + }) + }) + + it('sorts alternatives with control characters', async () => { + await invalid({ + errors: [ + { + data: { right: String.raw`\\cA`, left: String.raw`\\cB` }, + messageId: 'unexpectedRegExpOrder', + }, + ], + output: dedent` + /(\\cA|\\cB|\\cC)/ + `, + code: dedent` + /(\\cB|\\cA|\\cC)/ + `, + options: [options], + }) + }) + + it('sorts alternatives with octal escapes', async () => { + await invalid({ + errors: [ + { + data: { right: String.raw`\\101`, left: String.raw`\\102` }, + messageId: 'unexpectedRegExpOrder', + }, + ], + output: dedent` + /(\\101|\\102|\\103)/ + `, + code: dedent` + /(\\102|\\101|\\103)/ + `, + options: [options], + }) + }) + + it('sorts alternatives with hex escapes', async () => { + await invalid({ + errors: [ + { + data: { right: String.raw`\\x41`, left: String.raw`\\x42` }, + messageId: 'unexpectedRegExpOrder', + }, + ], + output: dedent` + /(\\x41|\\x42|\\x43)/ + `, + code: dedent` + /(\\x42|\\x41|\\x43)/ + `, + options: [options], + }) + }) + + it('sorts alternatives with whitespace characters', async () => { + await invalid({ + errors: [ + { + messageId: 'unexpectedRegExpOrder', + data: { right: 'a', left: 'b' }, + }, + ], + output: dedent` + /(a|b|c)/ + `, + code: dedent` + /(b|a|c)/ + `, + options: [options], + }) + }) + + it('sorts alternatives with digit and word shortcuts', async () => { + await invalid({ + errors: [ + { + data: { right: String.raw`\\d`, left: String.raw`\\w` }, + messageId: 'unexpectedRegExpOrder', + }, + { + data: { right: String.raw`\\D`, left: String.raw`\\W` }, + messageId: 'unexpectedRegExpOrder', + }, + ], + output: dedent` + /(\\d|\\D|\\s|\\S|\\w|\\W)/ + `, + code: dedent` + /(\\w|\\d|\\s|\\W|\\D|\\S)/ + `, + options: [options], + }) + }) + + it('sorts alternatives using alias custom groups', async () => { + let customOptions = { + ...options, + customGroups: [ + { + elementNamePattern: '^alpha$', + groupName: 'alpha-group', + selector: 'alias', + }, + ], + groups: ['alpha-group', 'pattern'], + } as const + + await invalid({ + errors: [ + { + data: { + right: 'alpha: (?aaa)', + left: 'beta: (?bbb)', + }, + messageId: 'unexpectedRegExpOrder', + }, + ], + output: dedent` + /(?aaa)|(?bbb)/ + `, + code: dedent` + /(?bbb)|(?aaa)/ + `, + options: [customOptions], + }) + }) + + it('sorts alternatives with case-insensitive flag', async () => { + await invalid({ + errors: [ + { + data: { left: 'banana', right: 'apple' }, + messageId: 'unexpectedRegExpOrder', + }, + ], + output: dedent` + /(apple|banana|cherry)/i + `, + code: dedent` + /(banana|apple|cherry)/i + `, + options: [options], + }) + }) + + it('sorts alternatives with multiline flag', async () => { + await invalid({ + errors: [ + { + data: { right: '^alpha', left: '^beta' }, + messageId: 'unexpectedRegExpOrder', + }, + ], + output: dedent` + /(^alpha|^beta|^gamma)/m + `, + code: dedent` + /(^beta|^alpha|^gamma)/m + `, + options: [options], + }) + }) + + it('sorts alternatives with sticky flag', async () => { + await invalid({ + errors: [ + { + data: { right: 'aaa', left: 'bbb' }, + messageId: 'unexpectedRegExpOrder', + }, + ], + output: dedent` + /(aaa|bbb|ccc)/y + `, + code: dedent` + /(bbb|aaa|ccc)/y + `, + options: [options], + }) + }) + + it('sorts alternatives with dotAll flag', async () => { + await invalid({ + errors: [ + { + data: { right: 'a.b', left: 'c.d' }, + messageId: 'unexpectedRegExpOrder', + }, + ], + output: dedent` + /(a.b|c.d|e.f)/s + `, + code: dedent` + /(c.d|a.b|e.f)/s + `, + options: [options], + }) + }) + + it('keeps sorting with one alternative', async () => { + await valid({ + code: dedent` + /(onlyOne)/ + `, + options: [options], + }) + }) + + it('keeps sorting with no alternatives', async () => { + await valid({ + code: dedent` + /noAlternatives/ + `, + options: [options], + }) + }) + + it('keeps already sorted complex regex', async () => { + await valid({ + code: dedent` + /(\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}|example\\.com|localhost)/ + `, + options: [options], + }) + }) + + it('works with complex cases', async () => { + await invalid({ + errors: [ + { + messageId: 'unexpectedRegExpOrder', + data: { right: 'g', left: 'i' }, + }, + { + data: { right: '0-9', left: 'a-z' }, + messageId: 'unexpectedRegExpOrder', + }, + { + data: { right: 'aaa', left: 'bb' }, + messageId: 'unexpectedRegExpOrder', + }, + ], + output: dedent` + /[0-9a-z].(aaa|bb|c)/gi + `, + code: dedent` + /[a-z0-9].(bb|aaa|c)/ig + `, + }) + }) + + it('does not sort alternatives when one is a prefix of another', async () => { + await valid({ + code: dedent` + /(ab|a)/ + `, + options: [options], + }) + }) + + it('sorts alternatives with common prefix but no prefix relationship', async () => { + await invalid({ + errors: [{ messageId: 'unexpectedRegExpOrder' }], + output: dedent` + /(abc|abd)/ + `, + code: dedent` + /(abd|abc)/ + `, + options: [options], + }) + }) + + it('keeps hyphen at the end when it is a literal character', async () => { + await valid({ + code: dedent` + /[abc-]/ + `, + options: [options], + }) + }) + + it('keeps hyphen at the beginning when it is a literal character', async () => { + await invalid({ + errors: [ + { + messageId: 'unexpectedRegExpOrder', + data: { right: 'a', left: '-' }, + }, + ], + output: dedent` + /[abc-]/ + `, + code: dedent` + /[-abc]/ + `, + options: [options], + }) + }) + + it('does not move hyphen from edge to middle', async () => { + await invalid({ + errors: [ + { + messageId: 'unexpectedRegExpOrder', + data: { right: 'a', left: 'z' }, + }, + ], + output: dedent` + /[axz-]/ + `, + code: dedent` + /[zxa-]/ + `, + options: [options], + }) + }) + + it('sorts character class with escaped hyphen normally', async () => { + await invalid({ + errors: [ + { + messageId: 'unexpectedRegExpOrder', + data: { right: 'a', left: 'z' }, + }, + ], + output: dedent(String.raw` + /[axz\-]/ + `), + code: dedent(String.raw` + /[z\-xa]/ + `), + options: [options], + }) + }) + + it('does not create accidental range from literal hyphen', async () => { + await invalid({ + errors: [ + { + messageId: 'unexpectedRegExpOrder', + data: { right: 'a', left: 'z' }, + }, + ], + output: dedent` + /[az-]/ + `, + code: dedent` + /[za-]/ + `, + options: [options], + }) + }) + + it('keeps literal hyphen last when mixed with character sets', async () => { + await valid({ + code: dedent(String.raw` + /[\w$-]/ + `), + options: [options], + }) + }) + + it('preserves range when hyphen is part of range', async () => { + await invalid({ + errors: [ + { + messageId: 'unexpectedRegExpOrder', + data: { right: '0-9', left: 'z' }, + }, + ], + output: dedent` + /[0-9a-fz]/ + `, + code: dedent` + /[z0-9a-f]/ + `, + options: [options], + }) + }) + + it('does not sort character classes with v flag (set operations)', async () => { + await valid({ + code: dedent(String.raw` + /[za]/v + `), + options: [options], + }) + }) + + it('does not sort character classes with v flag to avoid breaking set operations', async () => { + await valid({ + code: dedent` + /[zyxabc]/v + `, + options: [options], + }) + }) + + it('still sorts flags even with v flag present', async () => { + await invalid({ + errors: [ + { + messageId: 'unexpectedRegExpOrder', + data: { right: 'g', left: 'v' }, + }, + ], + output: dedent` + /pattern/gv + `, + code: dedent` + /pattern/vg + `, + options: [options], + }) + }) + }) + + describe('line-length', () => { + let options = { + type: 'line-length', + order: 'desc', + } as const + + it('ignores non-regex literals', async () => { + await valid({ + code: dedent` + const sample = 'not a regular expression'; + `, + options: [options], + }) + }) + + it('sorts alternatives', async () => { + await invalid({ + errors: [{ messageId: 'unexpectedRegExpOrder' }], + output: dedent` + /(bbb|cc|a)/ + `, + code: dedent` + /(a|bbb|cc)/ + `, + options: [options], + }) + }) + + it('sorts character classes with ranges', async () => { + await invalid({ + errors: [ + { + messageId: 'unexpectedRegExpOrder', + data: { right: '0-9', left: 'z' }, + }, + ], + output: dedent` + /[0-9a-fz]/ + `, + code: dedent` + /[z0-9a-f]/ + `, + options: [options], + }) + }) + + it('sorts mixed character class elements', async () => { + await invalid({ + errors: [ + { + messageId: 'unexpectedRegExpOrder', + data: { right: '0-9', left: 'z' }, + }, + ], + output: dedent` + /[0-9a-fA-Zz]/ + `, + code: dedent` + /[z0-9a-fA-Z]/ + `, + options: [options], + }) + }) + + it('keeps already sorted character classes', async () => { + await valid({ + code: dedent` + /[0-9A-Za-z]/ + `, + options: [options], + }) + }) + + it('sorts character classes with equivalent elements', async () => { + await invalid({ + errors: [ + { + data: { right: String.raw`\x61`, left: 'a' }, + messageId: 'unexpectedRegExpOrder', + }, + ], + output: dedent(String.raw` + /[\x61a]/ + `), + code: dedent(String.raw` + /[a\x61]/ + `), + options: [options], + }) + }) + + it('sorts character classes with astral characters', async () => { + await invalid({ + errors: [ + { + data: { + right: String.raw`\u{1F600}-\u{1F602}`, + left: String.raw`\u{1F603}`, + }, + messageId: 'unexpectedRegExpOrder', + }, + ], + output: dedent(String.raw` + /[\u{1F600}-\u{1F602}\u{1F603}]/u + `), + code: dedent(String.raw` + /[\u{1F603}\u{1F600}-\u{1F602}]/u + `), + options: [options], + }) + }) + + it('sorts regex literal inside function call', async () => { + await invalid({ + errors: [ + { + data: { right: 'apple', left: 'pear' }, + messageId: 'unexpectedRegExpOrder', + }, + { + data: { right: 'orange', left: 'apple' }, + messageId: 'unexpectedRegExpOrder', + }, + ], + output: dedent` + test(/(orange|apple|pear)/); + `, + code: dedent` + test(/(pear|apple|orange)/); + `, + options: [options], + }) + }) + + it('keeps already sorted alternatives', async () => { + await valid({ + code: dedent` + /(aaa|bb|c)/gi + `, + options: [options], + }) + }) + + it('sorts plain alternations', async () => { + await invalid({ + errors: [ + { + messageId: 'unexpectedRegExpOrder', + data: { right: 'aaa', left: 'c' }, + }, + ], + output: dedent` + /(aaa|bb|c)/ + `, + code: dedent` + /(c|aaa|bb)/ + `, + options: [options], + }) + }) + + it('skips sorting alternatives with shadowed numbers', async () => { + await valid({ + code: dedent` + /(20|1|10|2|3)/ + `, + options: [options], + }) + }) + + it('does not reorder alternatives when negated character class overlaps literal', async () => { + await valid({ + code: dedent` + /(?:\([^)]*\)|[^;])+/ + `, + options: [options], + }) + }) + + it('does not reorder @import alternatives with greedy negated class', async () => { + await valid({ + code: dedent(String.raw` + /@import(?:\s*url\([^)]*\).*?|[^;]*);/ + `), + options: [options], + }) + }) + + it('does not reorder alternatives when character class overlaps multi-character sequence', async () => { + await valid({ + code: dedent(String.raw` + /\r\n|[\r\n\u2028\u2029]/ + `), + options: [options], + }) + }) + + it('does not reorder alternatives when optional sequence overlaps character class', async () => { + await valid({ + code: dedent(String.raw` + /\r?\n|[\r\n]/ + `), + options: [options], + }) + }) + + it('sorts alternatives with quantifiers', async () => { + await invalid({ + errors: [ + { + data: { right: 'a{2,4}', left: 'a?' }, + messageId: 'unexpectedRegExpOrder', + }, + ], + output: dedent` + /(a{2,4}|b*|a+|a?)/ + `, + code: dedent` + /(b*|a+|a?|a{2,4})/ + `, + options: [options], + }) + }) + + it('skips sorting when empty alternative can shadow others', async () => { + await valid({ + code: dedent` + /(b||a)/ + `, + options: [options], + }) + }) + + it('sorts named group alternatives', async () => { + await invalid({ + output: dedent` + /(?value|z)/ + `, + code: dedent` + /(?z|value)/ + `, + errors: [{ messageId: 'unexpectedRegExpOrder' }], + options: [options], + }) + }) + + it('sorts alternatives with lookahead assertions', async () => { + await invalid({ + errors: [ + { + data: { right: '(?=longTerm)', left: '(?=short)' }, + messageId: 'unexpectedRegExpOrder', + }, + ], + output: dedent` + /((?=longTerm)|(?=short)|(?=x))/ + `, + code: dedent` + /((?=short)|(?=longTerm)|(?=x))/ + `, + options: [options], + }) + }) + + it('sorts alternatives with negative lookahead', async () => { + await invalid({ + errors: [ + { + data: { right: '(?!lengthy)', left: '(?!tiny)' }, + messageId: 'unexpectedRegExpOrder', + }, + ], + output: dedent` + /((?!lengthy)|(?!tiny)|(?!x))/ + `, + code: dedent` + /((?!tiny)|(?!lengthy)|(?!x))/ + `, + options: [options], + }) + }) + + it('sorts character class elements', async () => { + await invalid({ + errors: [ + { + data: { left: String.raw`\w`, right: '0-9' }, + messageId: 'unexpectedRegExpOrder', + }, + ], + output: dedent(String.raw` + /[0-9\w]/ + `), + code: dedent(String.raw` + /[\w0-9]/ + `), + options: [options], + }) + }) + + it('sorts alternatives with word boundaries', async () => { + await invalid({ + errors: [ + { + data: { + right: String.raw`\\bhippopotamus\\b`, + left: String.raw`\\bcat\\b`, + }, + messageId: 'unexpectedRegExpOrder', + }, + ], + output: dedent` + /(\\bhippopotamus\\b|\\bcat\\b|\\bdog\\b)/ + `, + code: dedent` + /(\\bcat\\b|\\bhippopotamus\\b|\\bdog\\b)/ + `, + options: [options], + }) + }) + + it('sorts alternatives with unicode characters', async () => { + await invalid({ + errors: [ + { + data: { right: 'Ωmega', left: 'β' }, + messageId: 'unexpectedRegExpOrder', + }, + ], + output: dedent` + /(Ωmega|Δelta|β|α)/ + `, + code: dedent` + /(β|Ωmega|Δelta|α)/ + `, + options: [options], + }) + }) + + it('respects asc order when configured', async () => { + await invalid({ + errors: [ + { messageId: 'unexpectedRegExpOrder' }, + { messageId: 'unexpectedRegExpOrder' }, + ], + options: [ + { + ...options, + order: 'asc', + }, + ], + output: dedent` + /(a|cc|bbb)/ + `, + code: dedent` + /(bbb|cc|a)/ + `, + }) + }) + + it('does not sort alternatives when one is a prefix of another', async () => { + await valid({ + code: dedent` + /(a|ab)/ + `, + options: [options], + }) + }) + + it('sorts alternatives with common prefix but no prefix relationship', async () => { + await invalid({ + errors: [{ messageId: 'unexpectedRegExpOrder' }], + output: dedent` + /(abcd|abd)/ + `, + code: dedent` + /(abd|abcd)/ + `, + options: [options], + }) + }) + + it('does not sort character classes with v flag (set operations)', async () => { + await valid({ + code: dedent(String.raw` + /[za]/v + `), + options: [options], + }) + }) + + it('does not sort character classes with v flag to avoid breaking set operations', async () => { + await valid({ + code: dedent` + /[zyxabc]/v + `, + options: [options], + }) + }) + }) + + describe('custom', () => { + let alphabet = Alphabet.generateRecommendedAlphabet() + .sortByLocaleCompare('en-US') + .getCharacters() + + let options = { + type: 'custom', + order: 'asc', + alphabet, + } as const + + it('sorts elements in sets', async () => { + await valid({ + code: dedent` + /a|b|c|d/ + `, + options: [options], + }) + + await invalid({ + errors: [ + { + data: { + right: 'b', + left: 'c', + }, + messageId: 'unexpectedRegExpOrder', + }, + ], + output: dedent` + /a|b|c|d/ + `, + code: dedent` + /a|c|b|d/ + `, + options: [options], + }) + }) + }) + + describe('unsorted', () => { + let options = { + type: 'unsorted', + order: 'asc', + } as const + + it('respects unsorted type for character classes', async () => { + await valid({ + code: dedent` + /[zxa]/ + `, + options: [options], + }) + }) + + it('respects unsorted type for alternatives', async () => { + await valid({ + code: dedent` + /(c|a|b)/ + `, + options: [options], + }) + }) + + it('respects unsorted type for complex regex', async () => { + await valid({ + code: dedent` + /[zxa].*(c|a|b)/ + `, + options: [options], + }) + }) + }) + + describe('misc', () => { + it('validates the JSON schema', async () => { + await expect( + validateRuleJsonSchema(rule.meta.schema), + ).resolves.not.toThrow() + }) + + it('does not report when rule is disabled for entire file', async () => { + await valid({ + code: dedent` + /* eslint-disable rule-to-test/sort-regexp */ + /pattern/igmus + `, + }) + }) + + it('does not report when rule is disabled for next line', async () => { + await valid({ + code: dedent` + // eslint-disable-next-line rule-to-test/sort-regexp + /pattern/igmus + `, + }) + }) + + it('does not report when rule is disabled inline', async () => { + await valid({ + code: dedent` + /pattern/igmus // eslint-disable-line rule-to-test/sort-regexp + `, + }) + }) + + it('does not report for alternatives when disabled', async () => { + await valid({ + code: dedent` + /* eslint-disable rule-to-test/sort-regexp */ + /(c|b|a)/ + `, + }) + }) + + it('does not report for character classes when disabled', async () => { + await valid({ + code: dedent` + // eslint-disable-next-line rule-to-test/sort-regexp + /[zxa]/ + `, + }) + }) + }) +})