Skip to content

Commit 3e2e1ad

Browse files
committed
Revert "refactor(compiler): tokenize regular expression literals (angular#63857)"
This reverts commit 8a69c06.
1 parent e144222 commit 3e2e1ad

File tree

2 files changed

+1
-244
lines changed

2 files changed

+1
-244
lines changed

packages/compiler/src/expression_parser/lexer.ts

Lines changed: 0 additions & 95 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,6 @@ export enum TokenType {
1616
String,
1717
Operator,
1818
Number,
19-
RegExpBody,
20-
RegExpFlags,
2119
Error,
2220
}
2321

@@ -130,14 +128,6 @@ export class Token {
130128
return this.type === TokenType.Error;
131129
}
132130

133-
isRegExpBody(): boolean {
134-
return this.type === TokenType.RegExpBody;
135-
}
136-
137-
isRegExpFlags(): boolean {
138-
return this.type === TokenType.RegExpFlags;
139-
}
140-
141131
toNumber(): number {
142132
return this.type === TokenType.Number ? this.numValue : -1;
143133
}
@@ -169,8 +159,6 @@ export class Token {
169159
case TokenType.PrivateIdentifier:
170160
case TokenType.String:
171161
case TokenType.Error:
172-
case TokenType.RegExpBody:
173-
case TokenType.RegExpFlags:
174162
return this.strValue;
175163
case TokenType.Number:
176164
return this.numValue.toString();
@@ -219,14 +207,6 @@ function newErrorToken(index: number, end: number, message: string): Token {
219207
return new Token(index, end, TokenType.Error, 0, message);
220208
}
221209

222-
function newRegExpBodyToken(index: number, end: number, text: string): Token {
223-
return new Token(index, end, TokenType.RegExpBody, 0, text);
224-
}
225-
226-
function newRegExpFlagsToken(index: number, end: number, text: string): Token {
227-
return new Token(index, end, TokenType.RegExpFlags, 0, text);
228-
}
229-
230210
export const EOF: Token = new Token(-1, -1, TokenType.Character, 0, '');
231211

232212
class _Scanner {
@@ -320,9 +300,6 @@ class _Scanner {
320300
case chars.$MINUS:
321301
return this.scanComplexOperator(start, '-', chars.$EQ, '=');
322302
case chars.$SLASH:
323-
if (this.isStartOfRegex()) {
324-
return this.scanRegex(index);
325-
}
326303
return this.scanComplexOperator(start, '/', chars.$EQ, '=');
327304
case chars.$PERCENT:
328305
return this.scanComplexOperator(start, '%', chars.$EQ, '=');
@@ -629,78 +606,6 @@ class _Scanner {
629606

630607
return newOperatorToken(start, this.index, operator);
631608
}
632-
633-
private isStartOfRegex(): boolean {
634-
if (this.tokens.length === 0) {
635-
return true;
636-
}
637-
638-
const lastToken = this.tokens[this.tokens.length - 1];
639-
640-
return (
641-
!lastToken.isIdentifier() &&
642-
!lastToken.isPrivateIdentifier() &&
643-
!lastToken.isNumber() &&
644-
!lastToken.isString() &&
645-
!lastToken.isKeyword() &&
646-
!lastToken.isCharacter(chars.$RPAREN) &&
647-
!lastToken.isCharacter(chars.$RBRACKET)
648-
);
649-
}
650-
651-
private scanRegex(tokenStart: number): Token {
652-
this.advance();
653-
const textStart = this.index;
654-
let inEscape = false;
655-
let inCharacterClass = false;
656-
657-
while (true) {
658-
const peek = this.peek;
659-
660-
if (peek === chars.$EOF) {
661-
return this.error('Unterminated regular expression', 0);
662-
}
663-
664-
if (inEscape) {
665-
inEscape = false;
666-
} else if (peek === chars.$BACKSLASH) {
667-
inEscape = true;
668-
} else if (peek === chars.$LBRACKET) {
669-
inCharacterClass = true;
670-
} else if (peek === chars.$RBRACKET) {
671-
inCharacterClass = false;
672-
} else if (peek === chars.$SLASH && !inCharacterClass) {
673-
break;
674-
}
675-
this.advance();
676-
}
677-
678-
// Note that we want the text without the slashes,
679-
// but we still want the slashes to be part of the span.
680-
const value = this.input.substring(textStart, this.index);
681-
this.advance();
682-
const bodyToken = newRegExpBodyToken(tokenStart, this.index, value);
683-
const flagsToken = this.scanRegexFlags(this.index);
684-
685-
if (flagsToken !== null) {
686-
this.tokens.push(bodyToken);
687-
return flagsToken;
688-
}
689-
690-
return bodyToken;
691-
}
692-
693-
private scanRegexFlags(start: number): Token | null {
694-
if (!chars.isAsciiLetter(this.peek)) {
695-
return null;
696-
}
697-
698-
while (chars.isAsciiLetter(this.peek)) {
699-
this.advance();
700-
}
701-
702-
return newRegExpFlagsToken(start, this.index, this.input.substring(start, this.index));
703-
}
704609
}
705610

706611
function isIdentifierStart(code: number): boolean {

packages/compiler/test/expression_parser/lexer_spec.ts

Lines changed: 1 addition & 149 deletions
Original file line numberDiff line numberDiff line change
@@ -72,18 +72,6 @@ function expectErrorToken(token: Token, index: any, end: number, message: string
7272
expect(token.toString()).toEqual(message);
7373
}
7474

75-
function expectRegExpBodyToken(token: any, index: number, end: number, str: string) {
76-
expectToken(token, index, end);
77-
expect(token.isRegExpBody()).toBe(true);
78-
expect(token.toString()).toEqual(str);
79-
}
80-
81-
function expectRegExpFlagsToken(token: any, index: number, end: number, str: string) {
82-
expectToken(token, index, end);
83-
expect(token.isRegExpFlags()).toBe(true);
84-
expect(token.toString()).toEqual(str);
85-
}
86-
8775
describe('lexer', () => {
8876
describe('token', () => {
8977
it('should tokenize a simple identifier', () => {
@@ -422,7 +410,7 @@ describe('lexer', () => {
422410
expectOperatorToken(lex('+=')[0], 0, 2, '+=');
423411
expectOperatorToken(lex('-=')[0], 0, 2, '-=');
424412
expectOperatorToken(lex('*=')[0], 0, 2, '*=');
425-
expectOperatorToken(lex('a /= b')[1], 2, 4, '/=');
413+
expectOperatorToken(lex('/=')[0], 0, 2, '/=');
426414
expectOperatorToken(lex('%=')[0], 0, 2, '%=');
427415
expectOperatorToken(lex('**=')[0], 0, 3, '**=');
428416
expectOperatorToken(lex('&&=')[0], 0, 3, '&&=');
@@ -685,141 +673,5 @@ describe('lexer', () => {
685673
expectStringToken(tokens[6], 23, 24, '', StringTokenKind.TemplateLiteralEnd);
686674
});
687675
});
688-
689-
describe('regular expressions', () => {
690-
it('should tokenize a simple regex', () => {
691-
const tokens: Token[] = lex('/abc/');
692-
expect(tokens.length).toBe(1);
693-
expectRegExpBodyToken(tokens[0], 0, 5, 'abc');
694-
});
695-
696-
it('should tokenize a regex with flags', () => {
697-
const tokens: Token[] = lex('/abc/gim');
698-
expect(tokens.length).toBe(2);
699-
expectRegExpBodyToken(tokens[0], 0, 5, 'abc');
700-
expectRegExpFlagsToken(tokens[1], 5, 8, 'gim');
701-
});
702-
703-
it('should tokenize an identifier immediately after a regex', () => {
704-
const tokens: Token[] = lex('/abc/ g');
705-
expect(tokens.length).toBe(2);
706-
expectRegExpBodyToken(tokens[0], 0, 5, 'abc');
707-
expectIdentifierToken(tokens[1], 6, 7, 'g');
708-
});
709-
710-
it('should tokenize a regex with an escaped slashes', () => {
711-
const tokens: Token[] = lex('/^http:\\/\\/foo\\.bar/');
712-
expect(tokens.length).toBe(1);
713-
expectRegExpBodyToken(tokens[0], 0, 20, '^http:\\/\\/foo\\.bar');
714-
});
715-
716-
it('should tokenize a regex with un-escaped slashes in a character class', () => {
717-
const tokens: Token[] = lex('/[a/]$/');
718-
expect(tokens.length).toBe(1);
719-
expectRegExpBodyToken(tokens[0], 0, 7, '[a/]$');
720-
});
721-
722-
it('should tokenize a regex with a backslash', () => {
723-
const tokens: Token[] = lex('/a\\w+/');
724-
expect(tokens.length).toBe(1);
725-
expectRegExpBodyToken(tokens[0], 0, 6, 'a\\w+');
726-
});
727-
728-
it('should tokenize a method call on a regex', () => {
729-
const tokens: Token[] = lex('/abc/.test("foo")');
730-
expect(tokens.length).toBe(6);
731-
expectRegExpBodyToken(tokens[0], 0, 5, 'abc');
732-
expectCharacterToken(tokens[1], 5, 6, '.');
733-
expectIdentifierToken(tokens[2], 6, 10, 'test');
734-
expectCharacterToken(tokens[3], 10, 11, '(');
735-
expectStringToken(tokens[4], 11, 16, 'foo', StringTokenKind.Plain);
736-
expectCharacterToken(tokens[5], 16, 17, ')');
737-
});
738-
739-
it('should tokenize a method call with a regex parameter', () => {
740-
const tokens: Token[] = lex('"foo".match(/abc/)');
741-
expect(tokens.length).toBe(6);
742-
expectStringToken(tokens[0], 0, 5, 'foo', StringTokenKind.Plain);
743-
expectCharacterToken(tokens[1], 5, 6, '.');
744-
expectIdentifierToken(tokens[2], 6, 11, 'match');
745-
expectCharacterToken(tokens[3], 11, 12, '(');
746-
expectRegExpBodyToken(tokens[4], 12, 17, 'abc');
747-
expectCharacterToken(tokens[5], 17, 18, ')');
748-
});
749-
750-
it('should not tokenize a regex preceded by a square bracket', () => {
751-
const tokens: Token[] = lex('a[0] /= b');
752-
expect(tokens.length).toBe(6);
753-
expectIdentifierToken(tokens[0], 0, 1, 'a');
754-
expectCharacterToken(tokens[1], 1, 2, '[');
755-
expectNumberToken(tokens[2], 2, 3, 0);
756-
expectCharacterToken(tokens[3], 3, 4, ']');
757-
expectOperatorToken(tokens[4], 5, 7, '/=');
758-
expectIdentifierToken(tokens[5], 8, 9, 'b');
759-
});
760-
761-
it('should not tokenize a regex preceded by an identifier', () => {
762-
const tokens: Token[] = lex('a / b');
763-
expect(tokens.length).toBe(3);
764-
expectIdentifierToken(tokens[0], 0, 1, 'a');
765-
expectOperatorToken(tokens[1], 2, 3, '/');
766-
expectIdentifierToken(tokens[2], 4, 5, 'b');
767-
});
768-
769-
it('should not tokenize a regex preceded by a number', () => {
770-
const tokens: Token[] = lex('1 / b');
771-
expect(tokens.length).toBe(3);
772-
expectNumberToken(tokens[0], 0, 1, 1);
773-
expectOperatorToken(tokens[1], 2, 3, '/');
774-
expectIdentifierToken(tokens[2], 4, 5, 'b');
775-
});
776-
777-
it('should not tokenize a regex that is preceded by a string', () => {
778-
const tokens: Token[] = lex('"a" / b');
779-
expect(tokens.length).toBe(3);
780-
expectStringToken(tokens[0], 0, 3, 'a', StringTokenKind.Plain);
781-
expectOperatorToken(tokens[1], 4, 5, '/');
782-
expectIdentifierToken(tokens[2], 6, 7, 'b');
783-
});
784-
785-
it('should not tokenize a regex preceded by a closing parenthesis', () => {
786-
const tokens: Token[] = lex('(a) / b');
787-
expect(tokens.length).toBe(5);
788-
expectCharacterToken(tokens[0], 0, 1, '(');
789-
expectIdentifierToken(tokens[1], 1, 2, 'a');
790-
expectCharacterToken(tokens[2], 2, 3, ')');
791-
expectOperatorToken(tokens[3], 4, 5, '/');
792-
expectIdentifierToken(tokens[4], 6, 7, 'b');
793-
});
794-
795-
it('should not tokenize a regex that is preceded by a keyword', () => {
796-
const tokens: Token[] = lex('this / b');
797-
expect(tokens.length).toBe(3);
798-
expectKeywordToken(tokens[0], 0, 4, 'this');
799-
expectOperatorToken(tokens[1], 5, 6, '/');
800-
expectIdentifierToken(tokens[2], 7, 8, 'b');
801-
});
802-
803-
it('should produce an error for an unterminated regex', () => {
804-
expectErrorToken(
805-
lex('/a')[0],
806-
2,
807-
2,
808-
'Lexer Error: Unterminated regular expression at column 2 in expression [/a]',
809-
);
810-
});
811-
812-
it('should produce an error for a incorrectly-escaped regex', () => {
813-
const tokens = lex('/a\\\\//');
814-
expect(tokens.length).toBe(2);
815-
expectRegExpBodyToken(tokens[0], 0, 5, 'a\\\\');
816-
expectErrorToken(
817-
tokens[1],
818-
6,
819-
6,
820-
'Lexer Error: Unterminated regular expression at column 6 in expression [/a\\\\//]',
821-
);
822-
});
823-
});
824676
});
825677
});

0 commit comments

Comments
 (0)