Add prefer-predefined-assertion rule (#171)

RunDevelopment · web-flow · commit 99009f367c7c · 2021-04-21T09:10:04.000+09:00
diff --git a/README.md b/README.md
@@ -125,6 +125,7 @@ The rules with the following star :star: are included in the `plugin:regexp/reco
 | [regexp/prefer-escape-replacement-dollar-char](https://ota-meshi.github.io/eslint-plugin-regexp/rules/prefer-escape-replacement-dollar-char.html) | enforces escape of replacement `$` character (`$$`). |  |
 | [regexp/prefer-named-backreference](https://ota-meshi.github.io/eslint-plugin-regexp/rules/prefer-named-backreference.html) | enforce using named backreferences | :wrench: |
 | [regexp/prefer-plus-quantifier](https://ota-meshi.github.io/eslint-plugin-regexp/rules/prefer-plus-quantifier.html) | enforce using `+` quantifier | :star::wrench: |
+| [regexp/prefer-predefined-assertion](https://ota-meshi.github.io/eslint-plugin-regexp/rules/prefer-predefined-assertion.html) | prefer predefined assertion over equivalent lookarounds | :wrench: |
 | [regexp/prefer-quantifier](https://ota-meshi.github.io/eslint-plugin-regexp/rules/prefer-quantifier.html) | enforce using quantifier | :wrench: |
 | [regexp/prefer-question-quantifier](https://ota-meshi.github.io/eslint-plugin-regexp/rules/prefer-question-quantifier.html) | enforce using `?` quantifier | :star::wrench: |
 | [regexp/prefer-range](https://ota-meshi.github.io/eslint-plugin-regexp/rules/prefer-range.html) | enforce using character class range | :wrench: |
diff --git a/docs/rules/README.md b/docs/rules/README.md
@@ -53,6 +53,7 @@ The rules with the following star :star: are included in the `plugin:regexp/reco
 | [regexp/prefer-escape-replacement-dollar-char](./prefer-escape-replacement-dollar-char.md) | enforces escape of replacement `$` character (`$$`). |  |
 | [regexp/prefer-named-backreference](./prefer-named-backreference.md) | enforce using named backreferences | :wrench: |
 | [regexp/prefer-plus-quantifier](./prefer-plus-quantifier.md) | enforce using `+` quantifier | :star::wrench: |
+| [regexp/prefer-predefined-assertion](./prefer-predefined-assertion.md) | prefer predefined assertion over equivalent lookarounds | :wrench: |
 | [regexp/prefer-quantifier](./prefer-quantifier.md) | enforce using quantifier | :wrench: |
 | [regexp/prefer-question-quantifier](./prefer-question-quantifier.md) | enforce using `?` quantifier | :star::wrench: |
 | [regexp/prefer-range](./prefer-range.md) | enforce using character class range | :wrench: |
diff --git a/docs/rules/prefer-predefined-assertion.md b/docs/rules/prefer-predefined-assertion.md
@@ -0,0 +1,43 @@
+---
+pageClass: "rule-details"
+sidebarDepth: 0
+title: "regexp/prefer-predefined-assertion"
+description: "prefer predefined assertion over equivalent lookarounds"
+---
+# regexp/prefer-predefined-assertion
+
+> prefer predefined assertion over equivalent lookarounds
+
+- :exclamation: <badge text="This rule has not been released yet." vertical="middle" type="error"> ***This rule has not been released yet.*** </badge>
+- :wrench: The `--fix` option on the [command line](https://eslint.org/docs/user-guide/command-line-interface#fixing-problems) can automatically fix some of the problems reported by this rule.
+
+## :book: Rule Details
+
+All predefined assertions (`\b`, `\B`, `^`, and `$`) can be expressed as lookaheads and lookbehinds. E.g. `/a$/` is the same as `/a(?![^])/`.
+
+In most cases, it's better to use the predefined assertions because they are better known.
+
+<eslint-code-block fix>
+
+```js
+/* eslint regexp/prefer-predefined-assertion: "error" */
+
+/* ✓ GOOD */
+var foo = /a(?=\W)/;
+
+/* ✗ BAD */
+var foo = /a(?![^])/;
+var foo = /a(?!\w)/;
+var foo = /a+(?!\w)(?:\s|bc+)+/;
+```
+
+</eslint-code-block>
+
+## :wrench: Options
+
+Nothing.
+
+## :mag: Implementation
+
+- [Rule source](https://github.com/ota-meshi/eslint-plugin-regexp/blob/master/lib/rules/prefer-predefined-assertion.ts)
+- [Test source](https://github.com/ota-meshi/eslint-plugin-regexp/blob/master/tests/lib/rules/prefer-predefined-assertion.ts)
diff --git a/lib/rules/prefer-predefined-assertion.ts b/lib/rules/prefer-predefined-assertion.ts
@@ -0,0 +1,227 @@
+import type { RegExpVisitor } from "regexpp/visitor"
+import type {
+    CharacterClass,
+    CharacterSet,
+    LookaroundAssertion,
+} from "regexpp/ast"
+import type { RegExpContext } from "../utils"
+import { createRule, defineRegexpVisitor } from "../utils"
+import {
+    Chars,
+    getFirstCharAfter,
+    getMatchingDirectionFromAssertionKind,
+    invertMatchingDirection,
+} from "regexp-ast-analysis"
+
+/**
+ * If the lookaround only consists of a single character, character set, or
+ * character class, then this single character will be returned.
+ */
+function getCharacters(
+    lookaround: LookaroundAssertion,
+): CharacterSet | CharacterClass | null {
+    if (lookaround.alternatives.length === 1) {
+        const alt = lookaround.alternatives[0]
+        if (alt.elements.length === 1) {
+            const first = alt.elements[0]
+            if (
+                first.type === "CharacterSet" ||
+                first.type === "CharacterClass"
+            ) {
+                return first
+            }
+        }
+    }
+    return null
+}
+
+export default createRule("prefer-predefined-assertion", {
+    meta: {
+        docs: {
+            description:
+                "prefer predefined assertion over equivalent lookarounds",
+            // TODO Switch to recommended in the major version.
+            // recommended: true,
+            recommended: false,
+        },
+        fixable: "code",
+        schema: [],
+        messages: {
+            replace:
+                "This lookaround assertion can be replaced with {{kind}} ('{{expr}}').",
+        },
+        type: "suggestion", // "problem",
+    },
+    create(context) {
+        /**
+         * Create visitor
+         */
+        function createVisitor(
+            regexpContext: RegExpContext,
+        ): RegExpVisitor.Handlers {
+            const {
+                node,
+                flags,
+                getRegexpLocation,
+                toCharSet,
+                fixReplaceNode,
+            } = regexpContext
+
+            const word = Chars.word(flags)
+            const nonWord = Chars.word(flags).negate()
+
+            // /\b/ == /(?<!\w)(?=\w)|(?<=\w)(?!\w)/
+            // /\B/ == /(?<=\w)(?=\w)|(?<!\w)(?!\w)/
+
+            /**
+             * Tries to replace the given assertion with a word boundary
+             * assertion
+             */
+            function replaceWordAssertion(
+                aNode: LookaroundAssertion,
+                wordNegated: boolean,
+            ): void {
+                const direction = getMatchingDirectionFromAssertionKind(
+                    aNode.kind,
+                )
+
+                /**
+                 * Whether the lookaround is equivalent to (?!\w) / (?<!\w) or (?=\w) / (?<=\w)
+                 */
+                let lookaroundNegated = aNode.negate
+                if (wordNegated) {
+                    // if the lookaround only contains a \W, then we have to negate the lookaround, so it only
+                    // contains a \w. This is only possible iff we know that the pattern requires at least one
+                    // character after the lookaround (in the direction of the lookaround).
+                    //
+                    // Examples:
+                    // (?=\W) == (?!\w|$)   ; Here we need to eliminate the $ which can be done by proving that the
+                    //                        pattern matches another character after the lookahead. Example:
+                    // (?=\W).+ == (?!\w).+ ; Since we know that the lookahead is always followed by a dot, we
+                    //                        eliminate the $ alternative because it will always reject.
+                    // (?!\W).+ == (?=\w|$).+ == (?=\w).+
+
+                    const after = getFirstCharAfter(aNode, direction, flags)
+
+                    const hasNextCharacter = !after.edge
+                    if (hasNextCharacter) {
+                        // we can successfully negate the lookaround
+                        lookaroundNegated = !lookaroundNegated
+                    } else {
+                        // we couldn't negate the \W, so it's not possible to convert the lookaround into a
+                        // predefined assertion
+                        return
+                    }
+                }
+
+                const before = getFirstCharAfter(
+                    aNode,
+                    invertMatchingDirection(direction),
+                    flags,
+                )
+                if (before.edge) {
+                    // to do the branch elimination necessary, we need to know the previous/next character
+                    return
+                }
+
+                let otherNegated
+                if (before.char.isSubsetOf(word)) {
+                    // we can think of the previous/next character as \w
+                    otherNegated = false
+                } else if (before.char.isSubsetOf(nonWord)) {
+                    // we can think of the previous/next character as \W
+                    otherNegated = true
+                } else {
+                    // the previous/next character is a subset of neither \w nor \W, so we can't do anything here
+                    return
+                }
+
+                let kind = undefined
+                let replacement = undefined
+                if (lookaroundNegated === otherNegated) {
+                    // \B
+                    kind = "a negated word boundary assertion"
+                    replacement = "\\B"
+                } else {
+                    // \b
+                    kind = "a word boundary assertion"
+                    replacement = "\\b"
+                }
+
+                if (kind && replacement) {
+                    context.report({
+                        node,
+                        loc: getRegexpLocation(aNode),
+                        messageId: "replace",
+                        data: { kind, expr: replacement },
+                        fix: fixReplaceNode(aNode, replacement),
+                    })
+                }
+            }
+
+            /**
+             * Tries to replace the given assertion with a edge assertion
+             */
+            function replaceEdgeAssertion(
+                aNode: LookaroundAssertion,
+                lineAssertion: boolean,
+            ): void {
+                if (!aNode.negate) {
+                    return
+                }
+                if (flags.multiline === lineAssertion) {
+                    const replacement = aNode.kind === "lookahead" ? "$" : "^"
+
+                    context.report({
+                        node,
+                        loc: getRegexpLocation(aNode),
+                        messageId: "replace",
+                        data: { kind: "an edge assertion", expr: replacement },
+                        fix: fixReplaceNode(aNode, replacement),
+                    })
+                }
+            }
+
+            return {
+                onAssertionEnter(aNode) {
+                    if (
+                        aNode.kind !== "lookahead" &&
+                        aNode.kind !== "lookbehind"
+                    ) {
+                        // this rule doesn't affect predefined assertions
+                        return
+                    }
+
+                    const chars = getCharacters(aNode)
+                    if (chars === null) {
+                        return
+                    }
+
+                    if (chars.type === "CharacterSet") {
+                        if (chars.kind === "word") {
+                            replaceWordAssertion(aNode, chars.negate)
+                            return
+                        }
+                        if (chars.kind === "any") {
+                            replaceEdgeAssertion(aNode, !flags.dotAll)
+                            return
+                        }
+                    }
+
+                    const charSet = toCharSet(chars)
+                    if (charSet.isAll) {
+                        replaceEdgeAssertion(aNode, false)
+                    } else if (charSet.equals(word)) {
+                        replaceWordAssertion(aNode, false)
+                    } else if (charSet.equals(nonWord)) {
+                        replaceWordAssertion(aNode, true)
+                    }
+                },
+            }
+        }
+
+        return defineRegexpVisitor(context, {
+            createVisitor,
+        })
+    },
+})
diff --git a/lib/utils/rules.ts b/lib/utils/rules.ts
@@ -41,6 +41,7 @@ import preferD from "../rules/prefer-d"
 import preferEscapeReplacementDollarChar from "../rules/prefer-escape-replacement-dollar-char"
 import preferNamedBackreference from "../rules/prefer-named-backreference"
 import preferPlusQuantifier from "../rules/prefer-plus-quantifier"
+import preferPredefinedAssertion from "../rules/prefer-predefined-assertion"
 import preferQuantifier from "../rules/prefer-quantifier"
 import preferQuestionQuantifier from "../rules/prefer-question-quantifier"
 import preferRange from "../rules/prefer-range"
@@ -96,6 +97,7 @@ export const rules = [
     preferEscapeReplacementDollarChar,
     preferNamedBackreference,
     preferPlusQuantifier,
+    preferPredefinedAssertion,
     preferQuantifier,
     preferQuestionQuantifier,
     preferRange,
diff --git a/tests/lib/rules/prefer-predefined-assertion.ts b/tests/lib/rules/prefer-predefined-assertion.ts
@@ -0,0 +1,95 @@
+import { RuleTester } from "eslint"
+import rule from "../../../lib/rules/prefer-predefined-assertion"
+
+const tester = new RuleTester({
+    parserOptions: {
+        ecmaVersion: 2020,
+        sourceType: "module",
+    },
+})
+
+tester.run("prefer-predefined-assertion", rule as any, {
+    valid: [String.raw`/a(?=\W)/`],
+    invalid: [
+        {
+            code: String.raw`/a(?=\w)/`,
+            output: String.raw`/a\B/`,
+            errors: [
+                "This lookaround assertion can be replaced with a negated word boundary assertion ('\\B').",
+            ],
+        },
+        {
+            code: String.raw`/a(?!\w)/`,
+            output: String.raw`/a\b/`,
+            errors: [
+                "This lookaround assertion can be replaced with a word boundary assertion ('\\b').",
+            ],
+        },
+        {
+            code: String.raw`/(?<=\w)a/`,
+            output: String.raw`/\Ba/`,
+            errors: [
+                "This lookaround assertion can be replaced with a negated word boundary assertion ('\\B').",
+            ],
+        },
+        {
+            code: String.raw`/(?<!\w)a/`,
+            output: String.raw`/\ba/`,
+            errors: [
+                "This lookaround assertion can be replaced with a word boundary assertion ('\\b').",
+            ],
+        },
+
+        {
+            code: String.raw`/a(?=\W)./`,
+            output: String.raw`/a\b./`,
+            errors: [
+                "This lookaround assertion can be replaced with a word boundary assertion ('\\b').",
+            ],
+        },
+        {
+            code: String.raw`/a(?!\W)./`,
+            output: String.raw`/a\B./`,
+            errors: [
+                "This lookaround assertion can be replaced with a negated word boundary assertion ('\\B').",
+            ],
+        },
+        {
+            code: String.raw`/.(?<=\W)a/`,
+            output: String.raw`/.\ba/`,
+            errors: [
+                "This lookaround assertion can be replaced with a word boundary assertion ('\\b').",
+            ],
+        },
+        {
+            code: String.raw`/.(?<!\W)a/`,
+            output: String.raw`/.\Ba/`,
+            errors: [
+                "This lookaround assertion can be replaced with a negated word boundary assertion ('\\B').",
+            ],
+        },
+
+        {
+            code: String.raw`/a+(?!\w)(?:\s|bc+)+/`,
+            output: String.raw`/a+\b(?:\s|bc+)+/`,
+            errors: [
+                "This lookaround assertion can be replaced with a word boundary assertion ('\\b').",
+            ],
+        },
+
+        {
+            code: String.raw`/(?!.)(?![^])/`,
+            output: String.raw`/(?!.)$/`,
+            errors: [
+                "This lookaround assertion can be replaced with an edge assertion ('$').",
+            ],
+        },
+        {
+            code: String.raw`/(?<!.)(?<![^])/m`,
+            output: String.raw`/^(?<![^])/m`,
+            errors: [
+                "This lookaround assertion can be replaced with an edge assertion ('^').",
+            ],
+        },
+    ],
+})