Skip to content

Commit 5f90640

Browse files
author
Kenji Fukuda
committed
Refactoring RegExp parsing and modifying SyntaxError message
1 parent c0f7bc4 commit 5f90640

File tree

3 files changed

+23
-14
lines changed

3 files changed

+23
-14
lines changed

lib/Parser/RegexParser.cpp

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2157,16 +2157,19 @@ namespace UnifiedRegex
21572157

21582158
if (returnedNode->tag == Node::MatchSet)
21592159
{
2160-
if (unicodeFlagPresent && pendingRangeStart != INVALID_CODEPOINT)
2161-
{
2162-
Fail(JSERR_UnicodeRegExpRangeContainsCharClass);
2163-
}
2164-
2165-
pendingCodePoint = INVALID_CODEPOINT;
21662160
if (pendingRangeStart != INVALID_CODEPOINT)
21672161
{
2162+
if (unicodeFlagPresent)
2163+
{
2164+
//We a range containing a character class and the unicode flag is present, thus we end up having to throw a "Syntax" error here
2165+
//This breaks the notion of Pass0 check for valid syntax, because during that time, the unicode flag is unknown.
2166+
Fail(JSERR_UnicodeRegExpRangeContainsCharClass); //From #sec-patterns-static-semantics-early-errors-annexb
2167+
}
2168+
21682169
codePointSet.Set(ctAllocator, '-');
21692170
}
2171+
2172+
pendingCodePoint = INVALID_CODEPOINT;
21702173
pendingRangeStart = INVALID_CODEPOINT;
21712174
codePointSet.UnionInPlace(ctAllocator, deferredSetNode.set);
21722175
currIsACharSet = true;
@@ -2201,18 +2204,21 @@ namespace UnifiedRegex
22012204

22022205
if (codePointToSet != INVALID_CODEPOINT || prevprevWasACharSetAndPartOfRange)
22032206
{
2204-
if (unicodeFlagPresent && prevprevWasACharSetAndPartOfRange)
2205-
{
2206-
Fail(JSERR_UnicodeRegExpRangeContainsCharClass);
2207-
}
2208-
else if (prevprevWasACharSetAndPartOfRange)
2207+
if (prevprevWasACharSetAndPartOfRange)
22092208
{
2209+
//We a range containing a character class and the unicode flag is present, thus we end up having to throw a "Syntax" error here
2210+
//This breaks the notion of Pass0 check for valid syntax, because during that time, the unicode flag is unknown.
2211+
if (unicodeFlagPresent)
2212+
{
2213+
Fail(JSERR_UnicodeRegExpRangeContainsCharClass);
2214+
}
2215+
22102216
if (pendingCodePoint != INVALID_CODEPOINT)
22112217
{
22122218
codePointSet.Set(ctAllocator, pendingCodePoint);
22132219
}
22142220

2215-
codePointSet.Set(ctAllocator, '-');
2221+
codePointSet.Set(ctAllocator, '-'); //Add '-' to set because a range was detected but turned out to be a union of character set with '-' and another atom.
22162222
pendingRangeStart = pendingCodePoint = INVALID_CODEPOINT;
22172223
}
22182224
else if (pendingRangeStart != INVALID_CODEPOINT)

lib/Parser/rterrors.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -366,7 +366,7 @@ RT_ERROR_MSG(JSERR_NoAccessors, 5673, "Invalid property descriptor: accessors no
366366
RT_ERROR_MSG(JSERR_RegExpInvalidEscape, 5674, "", "Invalid regular expression: invalid escape in unicode pattern", kjstSyntaxError, 0)
367367
RT_ERROR_MSG(JSERR_RegExpTooManyCapturingGroups, 5675, "", "Regular expression cannot have more than 32,767 capturing groups", kjstRangeError, 0)
368368
RT_ERROR_MSG(JSERR_ProxyHandlerReturnedFalse, 5676, "Proxy %s handler returned false", "Proxy handler returned false", kjstTypeError, 0)
369-
RT_ERROR_MSG(JSERR_UnicodeRegExpRangeContainsCharClass, 5677, "", "Character classes not allowed in class ranges", kjstSyntaxError, 0)
369+
RT_ERROR_MSG(JSERR_UnicodeRegExpRangeContainsCharClass, 5677, "%s", "Character classes not allowed in a RegExp class range.", kjstSyntaxError, 0)
370370

371371
//Host errors
372372
RT_ERROR_MSG(JSERR_HostMaybeMissingPromiseContinuationCallback, 5700, "", "Host may not have set any promise continuation callback. Promises may not be executed.", kjstTypeError, 0)

test/Regex/characterclass_with_range.js

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,9 @@ var tests = [
5858
matchRegExp("g", reUnicode, null);
5959
matchRegExp("\u2028", reUnicode, null);
6060
matchRegExp("\u2009", reUnicode, null);
61-
assert.throws(() => eval("/^[\\s-z]$/u.exec(\"-\")"), SyntaxError, "Expected an error due to character sets not being allowed in ranges when unicode flag is set.", "Character classes not allowed in class ranges");
61+
assert.throws(() => eval("/^[\\s-z]$/u.exec(\"-\")"), SyntaxError, "Expected an error due to character sets not being allowed in ranges when unicode flag is set.", "Character classes not allowed in a RegExp class range.");
62+
assert.throws(() => eval("/^[z-\\s]$/u.exec(\"-\")"), SyntaxError, "Expected an error due to character sets not being allowed in ranges when unicode flag is set.", "Character classes not allowed in a RegExp class range.");
63+
6264
}
6365
},
6466
{
@@ -95,6 +97,7 @@ var tests = [
9597
matchRegExp('--', re, '--');
9698
matchRegExp('-9', re, '-9');
9799
matchRegExp(' ', re, null);
100+
matchRegExp('-\\', re, null);
98101
}
99102
},
100103
{

0 commit comments

Comments
 (0)