Add regexp/no-useless-assertions rule (#137)

ota-meshi · web-flow · commit 70393ef4274c · 2021-04-16T16:27:33.000+02:00
diff --git a/README.md b/README.md
@@ -103,6 +103,7 @@ The rules with the following star :star: are included in the `plugin:regexp/reco
 | [regexp/no-potentially-useless-backreference](https://ota-meshi.github.io/eslint-plugin-regexp/rules/no-potentially-useless-backreference.html) | disallow backreferences that reference a group that might not be matched |  |
 | [regexp/no-trivially-nested-assertion](https://ota-meshi.github.io/eslint-plugin-regexp/rules/no-trivially-nested-assertion.html) | disallow trivially nested assertions | :wrench: |
 | [regexp/no-unused-capturing-group](https://ota-meshi.github.io/eslint-plugin-regexp/rules/no-unused-capturing-group.html) | disallow unused capturing group |  |
+| [regexp/no-useless-assertions](https://ota-meshi.github.io/eslint-plugin-regexp/rules/no-useless-assertions.html) | disallow assertions that are known to always accept (or reject) |  |
 | [regexp/no-useless-backreference](https://ota-meshi.github.io/eslint-plugin-regexp/rules/no-useless-backreference.html) | disallow useless backreferences in regular expressions | :star: |
 | [regexp/no-useless-character-class](https://ota-meshi.github.io/eslint-plugin-regexp/rules/no-useless-character-class.html) | disallow character class with one character | :wrench: |
 | [regexp/no-useless-dollar-replacements](https://ota-meshi.github.io/eslint-plugin-regexp/rules/no-useless-dollar-replacements.html) | disallow useless `$` replacements in replacement string |  |
diff --git a/docs/rules/README.md b/docs/rules/README.md
@@ -31,6 +31,7 @@ The rules with the following star :star: are included in the `plugin:regexp/reco
 | [regexp/no-potentially-useless-backreference](./no-potentially-useless-backreference.md) | disallow backreferences that reference a group that might not be matched |  |
 | [regexp/no-trivially-nested-assertion](./no-trivially-nested-assertion.md) | disallow trivially nested assertions | :wrench: |
 | [regexp/no-unused-capturing-group](./no-unused-capturing-group.md) | disallow unused capturing group |  |
+| [regexp/no-useless-assertions](./no-useless-assertions.md) | disallow assertions that are known to always accept (or reject) |  |
 | [regexp/no-useless-backreference](./no-useless-backreference.md) | disallow useless backreferences in regular expressions | :star: |
 | [regexp/no-useless-character-class](./no-useless-character-class.md) | disallow character class with one character | :wrench: |
 | [regexp/no-useless-dollar-replacements](./no-useless-dollar-replacements.md) | disallow useless `$` replacements in replacement string |  |
diff --git a/docs/rules/no-useless-assertions.md b/docs/rules/no-useless-assertions.md
@@ -0,0 +1,58 @@
+---
+pageClass: "rule-details"
+sidebarDepth: 0
+title: "regexp/no-useless-assertions"
+description: "disallow assertions that are known to always accept (or reject)"
+---
+# regexp/no-useless-assertions
+
+> disallow assertions that are known to always accept (or reject)
+
+- :exclamation: <badge text="This rule has not been released yet." vertical="middle" type="error"> ***This rule has not been released yet.*** </badge>
+
+## :book: Rule Details
+
+Some assertion are unnecessary because the rest of the pattern forces them to
+always be accept (or reject).
+
+<eslint-code-block>
+
+```js
+/* eslint regexp/no-useless-assertions: "error" */
+
+/* ✓ GOOD */
+var foo = /\bfoo\b/;
+
+/* ✗ BAD */
+var foo = /#\bfoo/;    // \b will always accept
+var foo = /foo\bbar/;  // \b will always reject
+var foo = /$foo/;      // $ will always reject
+var foo = /(?=\w)\d+/; // (?=\w) will always accept
+```
+
+</eslint-code-block>
+
+### Limitations
+
+Right now, this rule is implemented by only looking a single character ahead and
+behind. This is enough to determine whether the builtin assertions (`\b`, `\B`,
+`^`, `$`) trivially reject or accept but it is not enough for all lookarounds.
+The algorithm determining the characters ahead and behind is very conservative
+which can lead to false negatives.
+
+## :wrench: Options
+
+Nothing.
+
+## :heart: Compatibility
+
+This rule was taken from [eslint-plugin-clean-regex].  
+This rule is compatible with [clean-regex/no-unnecessary-assertions] rule.
+
+[eslint-plugin-clean-regex]: https://github.com/RunDevelopment/eslint-plugin-clean-regex
+[clean-regex/no-unnecessary-assertions]: https://github.com/RunDevelopment/eslint-plugin-clean-regex/blob/master/docs/rules/no-unnecessary-assertions.md
+
+## :mag: Implementation
+
+- [Rule source](https://github.com/ota-meshi/eslint-plugin-regexp/blob/master/lib/rules/no-useless-assertions.ts)
+- [Test source](https://github.com/ota-meshi/eslint-plugin-regexp/blob/master/tests/lib/rules/no-useless-assertions.ts)
diff --git a/lib/rules/no-useless-assertions.ts b/lib/rules/no-useless-assertions.ts
@@ -0,0 +1,316 @@
+import type { Expression } from "estree"
+import type { RegExpVisitor } from "regexpp/visitor"
+import type {
+    Assertion,
+    EdgeAssertion,
+    LookaroundAssertion,
+    WordBoundaryAssertion,
+} from "regexpp/ast"
+import {
+    createRule,
+    defineRegexpVisitor,
+    getRegexpLocation,
+    parseFlags,
+} from "../utils"
+import {
+    Chars,
+    getFirstCharAfter,
+    getFirstConsumedChar,
+    getLengthRange,
+    getMatchingDirectionFromAssertionKind,
+    hasSomeDescendant,
+    isPotentiallyEmpty,
+} from "regexp-ast-analysis"
+
+const messages = {
+    alwaysRejectByChar:
+        "'{{assertion}}' will always reject because it is {{followedOrPreceded}} by a character.",
+    alwaysRejectByNonLineTerminator:
+        "'{{assertion}}' will always reject because it is {{followedOrPreceded}} by a non-line-terminator character.",
+    alwaysAcceptByLineTerminator:
+        "'{{assertion}}' will always accept because it is {{followedOrPreceded}} by a line-terminator character.",
+    alwaysAcceptOrRejectFollowedByWord:
+        "'{{assertion}}' will always {{acceptOrReject}} because it is preceded by a non-word character and followed by a word character.",
+    alwaysAcceptOrRejectFollowedByNonWord:
+        "'{{assertion}}' will always {{acceptOrReject}} because it is preceded by a non-word character and followed by a non-word character.",
+    alwaysAcceptOrRejectPrecededByWordFollowedByNonWord:
+        "'{{assertion}}' will always {{acceptOrReject}} because it is preceded by a word character and followed by a non-word character.",
+    alwaysAcceptOrRejectPrecededByWordFollowedByWord:
+        "'{{assertion}}' will always {{acceptOrReject}} because it is preceded by a word character and followed by a word character.",
+    alwaysForLookaround:
+        "The {{kind}} '{{assertion}}' will always {{acceptOrReject}}.",
+    alwaysForNegativeLookaround:
+        "The negative {{kind}} '{{assertion}}' will always {{acceptOrReject}}.",
+}
+
+export default createRule("no-useless-assertions", {
+    meta: {
+        docs: {
+            description:
+                "disallow assertions that are known to always accept (or reject)",
+            // TODO Switch to recommended in the major version.
+            // recommended: true,
+            recommended: false,
+        },
+        schema: [],
+        messages,
+        type: "problem",
+    },
+    create(context) {
+        const sourceCode = context.getSourceCode()
+
+        /**
+         * Create visitor
+         * @param node
+         */
+        function createVisitor(
+            node: Expression,
+            _pattern: string,
+            flagsStr: string,
+        ): RegExpVisitor.Handlers {
+            const flags = parseFlags(flagsStr)
+            const flagsWithoutDotAll = parseFlags(flagsStr.replace(/s/g, ""))
+
+            /** Report */
+            function report(
+                assertion: Assertion,
+                messageId: keyof typeof messages,
+                data: Record<string, string>,
+            ) {
+                context.report({
+                    node,
+                    loc: getRegexpLocation(sourceCode, node, assertion),
+                    messageId,
+                    data: {
+                        assertion: assertion.raw,
+                        ...data,
+                    },
+                })
+            }
+
+            /**
+             * Verify for `^` or `$`
+             */
+            function verifyStartOrEnd(assertion: EdgeAssertion): void {
+                // Note: /^/ is the same as /(?<!.)/s and /^/m is the same as /(?<!.)/
+                // Note: /$/ is the same as /(?!.)/s and /$/m is the same as /(?!.)/
+
+                // get the "next" character
+                const direction = getMatchingDirectionFromAssertionKind(
+                    assertion.kind,
+                )
+                const next = getFirstCharAfter(assertion, direction, flags)
+
+                const followedOrPreceded =
+                    assertion.kind === "end" ? "followed" : "preceded"
+
+                if (!next.edge) {
+                    // there is always some character of `node`
+
+                    if (!flags.multiline) {
+                        // since the m flag isn't present any character will result in trivial rejection
+                        report(assertion, "alwaysRejectByChar", {
+                            followedOrPreceded,
+                        })
+                    } else {
+                        // only if the character is a sub set of /./, will the assertion trivially reject
+
+                        // with this little flag hack, we can easily create the dot set.
+                        const dot = Chars.lineTerminator(
+                            flagsWithoutDotAll,
+                        ).negate()
+
+                        if (next.char.isSubsetOf(dot)) {
+                            report(
+                                assertion,
+                                "alwaysRejectByNonLineTerminator",
+                                { followedOrPreceded },
+                            )
+                        } else if (next.char.isDisjointWith(dot)) {
+                            report(assertion, "alwaysAcceptByLineTerminator", {
+                                followedOrPreceded,
+                            })
+                        }
+                    }
+                }
+            }
+
+            /**
+             * Verify for `\b` or `\B`
+             */
+            function verifyWordBoundary(
+                assertion: WordBoundaryAssertion,
+            ): void {
+                const word = Chars.word(flags)
+
+                const next = getFirstCharAfter(assertion, "ltr", flags)
+                const prev = getFirstCharAfter(assertion, "rtl", flags)
+
+                if (prev.edge || next.edge) {
+                    // we can only do this analysis if we know the previous and next character
+                    return
+                }
+
+                const nextIsWord = next.char.isSubsetOf(word)
+                const prevIsWord = prev.char.isSubsetOf(word)
+                const nextIsNonWord = next.char.isDisjointWith(word)
+                const prevIsNonWord = prev.char.isDisjointWith(word)
+
+                // Note: /\b/ == /(?:(?<!\w)(?=\w)|(?<=\w)(?!\w))/  (other flags may apply)
+
+                // the idea here is that \B accepts when \b reject and vise versa.
+                const accept = assertion.negate ? "reject" : "accept"
+                const reject = assertion.negate ? "accept" : "reject"
+
+                if (prevIsNonWord) {
+                    // current branch: /(?<!\w)(?=\w)/
+
+                    if (nextIsWord) {
+                        report(
+                            assertion,
+                            "alwaysAcceptOrRejectFollowedByWord",
+                            {
+                                acceptOrReject: accept,
+                            },
+                        )
+                    }
+                    if (nextIsNonWord) {
+                        report(
+                            assertion,
+                            "alwaysAcceptOrRejectFollowedByNonWord",
+                            {
+                                acceptOrReject: reject,
+                            },
+                        )
+                    }
+                }
+                if (prevIsWord) {
+                    // current branch: /(?<=\w)(?!\w)/
+
+                    if (nextIsNonWord) {
+                        report(
+                            assertion,
+                            "alwaysAcceptOrRejectPrecededByWordFollowedByNonWord",
+                            {
+                                acceptOrReject: accept,
+                            },
+                        )
+                    }
+                    if (nextIsWord) {
+                        report(
+                            assertion,
+                            "alwaysAcceptOrRejectPrecededByWordFollowedByWord",
+                            {
+                                acceptOrReject: reject,
+                            },
+                        )
+                    }
+                }
+            }
+
+            /**
+             * Verify for LookaroundAssertion
+             */
+            function verifyLookaround(assertion: LookaroundAssertion): void {
+                if (isPotentiallyEmpty(assertion.alternatives)) {
+                    // we don't handle trivial accept/reject based on emptiness
+                    return
+                }
+
+                const direction = getMatchingDirectionFromAssertionKind(
+                    assertion.kind,
+                )
+                const after = getFirstCharAfter(assertion, direction, flags)
+                if (after.edge) {
+                    return
+                }
+
+                const firstOf = getFirstConsumedChar(
+                    assertion.alternatives,
+                    direction,
+                    flags,
+                )
+                if (firstOf.empty) {
+                    return
+                }
+
+                // the idea here is that a negate lookaround accepts when non-negated version reject and vise versa.
+                const accept = assertion.negate ? "reject" : "accept"
+                const reject = assertion.negate ? "accept" : "reject"
+
+                // Careful now! If exact is false, we are only guaranteed to have a superset of the actual character.
+                // False negatives are fine but we can't have false positives.
+
+                if (after.char.isDisjointWith(firstOf.char)) {
+                    report(
+                        assertion,
+                        assertion.negate
+                            ? "alwaysForNegativeLookaround"
+                            : "alwaysForLookaround",
+                        {
+                            kind: assertion.kind,
+                            acceptOrReject: reject,
+                        },
+                    )
+                }
+
+                // accept is harder because that can't generally be decided by the first character
+
+                // if this contains another assertion then that might reject. It's out of our control
+                if (
+                    !hasSomeDescendant(
+                        assertion,
+                        (d) => d !== assertion && d.type === "Assertion",
+                    )
+                ) {
+                    const range = getLengthRange(assertion.alternatives)
+                    // we only check the first character, so it's only correct if the assertion requires only one
+                    // character
+                    if (range && range.max === 1) {
+                        // require exactness
+                        if (
+                            firstOf.exact &&
+                            after.char.isSubsetOf(firstOf.char)
+                        ) {
+                            report(
+                                assertion,
+                                assertion.negate
+                                    ? "alwaysForNegativeLookaround"
+                                    : "alwaysForLookaround",
+                                {
+                                    kind: assertion.kind,
+                                    acceptOrReject: accept,
+                                },
+                            )
+                        }
+                    }
+                }
+            }
+
+            return {
+                onAssertionEnter(assertion) {
+                    switch (assertion.kind) {
+                        case "start":
+                        case "end":
+                            verifyStartOrEnd(assertion)
+                            break
+
+                        case "word":
+                            verifyWordBoundary(assertion)
+                            break
+
+                        case "lookahead":
+                        case "lookbehind":
+                            verifyLookaround(assertion)
+                            break
+                        default:
+                    }
+                },
+            }
+        }
+
+        return defineRegexpVisitor(context, {
+            createVisitor,
+        })
+    },
+})
diff --git a/lib/utils/rules.ts b/lib/utils/rules.ts
diff --git a/tests/lib/rules/no-useless-assertions.ts b/tests/lib/rules/no-useless-assertions.ts