|
1 | 1 | import type * as ESTree from "estree"
|
2 | 2 | import type { RuleListener, RuleModule, PartialRuleModule } from "../types"
|
3 | 3 | import type { RegExpVisitor } from "regexpp/visitor"
|
4 |
| -import type { Alternative, Element, Node, Quantifier } from "regexpp/ast" |
| 4 | +import type { Element, Node, Quantifier } from "regexpp/ast" |
5 | 5 | import { RegExpParser, visitRegExpAST } from "regexpp"
|
6 | 6 | import {
|
7 | 7 | CALL,
|
@@ -752,90 +752,111 @@ export function quantToString(quant: Readonly<Quant>): string {
|
752 | 752 |
|
753 | 753 | /* eslint-disable complexity -- X( */
|
754 | 754 | /**
|
755 |
| - * Check the siblings to see if the regex doesn't change when unwrapped. |
| 755 | + * Returns whether the concatenation of the two string might create new escape |
| 756 | + * sequences or elements. |
756 | 757 | */
|
757 |
| -export function canUnwrapped( |
| 758 | +function mightCreateNewElement( |
758 | 759 | /* eslint-enable complexity -- X( */
|
759 |
| - node: Element, |
760 |
| - text: string, |
| 760 | + before: string, |
| 761 | + after: string, |
761 | 762 | ): boolean {
|
762 |
| - const parent = node.parent |
763 |
| - let target: Element, alternative: Alternative |
764 |
| - if (parent.type === "Quantifier") { |
765 |
| - alternative = parent.parent |
766 |
| - target = parent |
767 |
| - } else if (parent.type === "Alternative") { |
768 |
| - alternative = parent |
769 |
| - target = node |
770 |
| - } else { |
| 763 | + // control |
| 764 | + // \cA |
| 765 | + if (before.endsWith("\\c") && /^[a-z]/i.test(after)) { |
771 | 766 | return true
|
772 | 767 | }
|
773 |
| - const index = alternative.elements.indexOf(target) |
774 |
| - if (index === 0) { |
| 768 | + |
| 769 | + // hexadecimal |
| 770 | + // \xFF \uFFFF |
| 771 | + if ( |
| 772 | + /(?:^|[^\\])(?:\\{2})*\\(?:x[\dA-Fa-f]?|u[\dA-Fa-f]{0,3})$/.test( |
| 773 | + before, |
| 774 | + ) && |
| 775 | + /^[\da-f]/i.test(after) |
| 776 | + ) { |
775 | 777 | return true
|
776 | 778 | }
|
777 |
| - if (/^\d+$/u.test(text)) { |
778 |
| - let prevIndex = index - 1 |
779 |
| - let prev = alternative.elements[prevIndex] |
780 |
| - if (prev.type === "Backreference") { |
781 |
| - // e.g. /()\1[0]/ -> /()\10/ |
782 |
| - return false |
783 |
| - } |
784 | 779 |
|
785 |
| - while ( |
786 |
| - prev.type === "Character" && |
787 |
| - /^\d+$/u.test(prev.raw) && |
788 |
| - prevIndex > 0 |
789 |
| - ) { |
790 |
| - prevIndex-- |
791 |
| - prev = alternative.elements[prevIndex] |
792 |
| - } |
793 |
| - if (prev.type === "Character" && prev.raw === "{") { |
794 |
| - // e.g. /a{[0]}/ -> /a{0}/ |
795 |
| - return false |
796 |
| - } |
| 780 | + // unicode |
| 781 | + // \u{FFFF} |
| 782 | + if ( |
| 783 | + (/(?:^|[^\\])(?:\\{2})*\\u$/.test(before) && |
| 784 | + /^\{[\da-f]*(?:\}[\s\S]*)?$/i.test(after)) || |
| 785 | + (/(?:^|[^\\])(?:\\{2})*\\u\{[\da-f]*$/.test(before) && |
| 786 | + /^(?:[\da-f]+\}?|\})/i.test(after)) |
| 787 | + ) { |
| 788 | + return true |
797 | 789 | }
|
798 |
| - if (/^[0-7]+$/u.test(text)) { |
799 |
| - const prev = alternative.elements[index - 1] |
800 |
| - if (prev.type === "Character" && /^\\[0-7]+$/u.test(prev.raw)) { |
801 |
| - // e.g. /\0[1]/ -> /\01/ |
802 |
| - return false |
803 |
| - } |
| 790 | + |
| 791 | + // octal |
| 792 | + // \077 \123 |
| 793 | + if ( |
| 794 | + (/(?:^|[^\\])(?:\\{2})*\\0[0-7]?$/.test(before) && |
| 795 | + /^[0-7]/.test(after)) || |
| 796 | + (/(?:^|[^\\])(?:\\{2})*\\[1-7]$/.test(before) && /^[0-7]/.test(after)) |
| 797 | + ) { |
| 798 | + return true |
804 | 799 | }
|
805 |
| - if (/^[\da-f]+$/iu.test(text)) { |
806 |
| - let prevIndex = index - 1 |
807 |
| - let prev = alternative.elements[prevIndex] |
808 |
| - while ( |
809 |
| - prev.type === "Character" && |
810 |
| - /^[\da-f]+$/iu.test(prev.raw) && |
811 |
| - prevIndex > 0 |
812 |
| - ) { |
813 |
| - prevIndex-- |
814 |
| - prev = alternative.elements[prevIndex] |
815 |
| - } |
816 |
| - if ( |
817 |
| - prev.type === "Character" && |
818 |
| - (prev.raw === "\\x" || prev.raw === "\\u") |
819 |
| - ) { |
820 |
| - // e.g. /\xF[F]/ -> /\xFF/ |
821 |
| - // e.g. /\uF[F]FF/ -> /\xFFFF/ |
822 |
| - return false |
823 |
| - } |
| 800 | + |
| 801 | + // backreference |
| 802 | + // \12 \k<foo> |
| 803 | + if ( |
| 804 | + (/(?:^|[^\\])(?:\\{2})*\\[1-9]\d*$/.test(before) && |
| 805 | + /^\d/.test(after)) || |
| 806 | + (/(?:^|[^\\])(?:\\{2})*\\k$/.test(before) && after.startsWith("<")) || |
| 807 | + /(?:^|[^\\])(?:\\{2})*\\k<[^<>]*$/.test(before) |
| 808 | + ) { |
| 809 | + return true |
824 | 810 | }
|
825 |
| - if (/^[a-z]+$/iu.test(text)) { |
826 |
| - if (index > 1) { |
827 |
| - const prev = alternative.elements[index - 1] |
828 |
| - if (prev.type === "Character" && prev.raw === "c") { |
829 |
| - const prev2 = alternative.elements[index - 2] |
830 |
| - if (prev2.type === "Character" && prev2.raw === "\\") { |
831 |
| - // e.g. /\c[M]/ -> /\cM/ |
832 |
| - return false |
833 |
| - } |
834 |
| - } |
835 |
| - } |
| 811 | + |
| 812 | + // property |
| 813 | + // \p{L} \P{L} |
| 814 | + if ( |
| 815 | + (/(?:^|[^\\])(?:\\{2})*\\p$/i.test(before) && |
| 816 | + /^\{[\w=]*(?:\}[\s\S]*)?$/.test(after)) || |
| 817 | + (/(?:^|[^\\])(?:\\{2})*\\p\{[\w=]*$/i.test(before) && |
| 818 | + /^[\w=]+(?:\}[\s\S]*)?$|^\}/.test(after)) |
| 819 | + ) { |
| 820 | + return true |
836 | 821 | }
|
837 | 822 |
|
838 |
| - return true |
| 823 | + // quantifier |
| 824 | + // {1} {2,} {2,3} |
| 825 | + if ( |
| 826 | + (/(?:^|[^\\])(?:\\{2})*\{\d*$/.test(before) && /^[\d,}]/.test(after)) || |
| 827 | + (/(?:^|[^\\])(?:\\{2})*\{\d+,$/.test(before) && |
| 828 | + /^(?:\d+(?:\}|$)|\})/.test(after)) || |
| 829 | + (/(?:^|[^\\])(?:\\{2})*\{\d+,\d*$/.test(before) && |
| 830 | + after.startsWith("}")) |
| 831 | + ) { |
| 832 | + return true |
| 833 | + } |
| 834 | + |
| 835 | + return false |
| 836 | +} |
| 837 | + |
| 838 | +/** |
| 839 | + * Check the siblings to see if the regex doesn't change when unwrapped. |
| 840 | + */ |
| 841 | +export function canUnwrapped(node: Element, text: string): boolean { |
| 842 | + let textBefore, textAfter |
| 843 | + |
| 844 | + const parent = node.parent |
| 845 | + if (parent.type === "Alternative") { |
| 846 | + textBefore = parent.raw.slice(0, node.start - parent.start) |
| 847 | + textAfter = parent.raw.slice(node.end - parent.start) |
| 848 | + } else if (parent.type === "Quantifier") { |
| 849 | + const alt = parent.parent |
| 850 | + textBefore = alt.raw.slice(0, node.start - alt.start) |
| 851 | + textAfter = alt.raw.slice(node.end - alt.start) |
| 852 | + } else { |
| 853 | + return true |
| 854 | + } |
| 855 | + |
| 856 | + return ( |
| 857 | + !mightCreateNewElement(textBefore, text) && |
| 858 | + !mightCreateNewElement(text, textAfter) |
| 859 | + ) |
839 | 860 | }
|
840 | 861 |
|
841 | 862 | /**
|
|
0 commit comments