Skip to content

Commit 4c26c3c

Browse files
author
Kenji Fukuda
committed
Comment fixes in js test and RegexParser, clarity changes
1 parent b15fa2c commit 4c26c3c

File tree

2 files changed

+36
-42
lines changed

2 files changed

+36
-42
lines changed

lib/Parser/RegexParser.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2167,7 +2167,7 @@ namespace UnifiedRegex
21672167
{
21682168
if (unicodeFlagPresent)
21692169
{
2170-
//We a range containing a character class and the unicode flag is present, thus we end up having to throw a "Syntax" error here
2170+
//A range containing a character class and the unicode flag is present, thus we end up having to throw a "Syntax" error here
21712171
//This breaks the notion of Pass0 check for valid syntax, because during that time, the unicode flag is unknown.
21722172
Fail(JSERR_UnicodeRegExpRangeContainsCharClass); //From #sec-patterns-static-semantics-early-errors-annexb
21732173
}
@@ -2212,7 +2212,7 @@ namespace UnifiedRegex
22122212
{
22132213
if (prevprevWasACharSetAndPartOfRange)
22142214
{
2215-
//We a range containing a character class and the unicode flag is present, thus we end up having to throw a "Syntax" error here
2215+
//A range containing a character class and the unicode flag is present, thus we end up having to throw a "Syntax" error here
22162216
//This breaks the notion of Pass0 check for valid syntax, because during that time, the unicode flag is unknown.
22172217
if (unicodeFlagPresent)
22182218
{
@@ -2500,7 +2500,7 @@ namespace UnifiedRegex
25002500
}
25012501
else
25022502
{
2503-
DeferredFailIfUnicode(JSERR_RegExpInvalidEscape); // Fail in unicode mode for non-letter escaped control characters according to 262 Annex-B RegExp grammar SPEC #prod-annexB-Term
2503+
DeferredFailIfUnicode(JSERR_RegExpInvalidEscape); // Fail in unicode mode for non-letter escaped control characters according to 262 Annex-B RegExp grammar spec #prod-annexB-Term
25042504

25052505
if (!IsEOF())
25062506
{
@@ -2633,15 +2633,15 @@ namespace UnifiedRegex
26332633
standardChars->SetNonWordChars(ctAllocator, deferredSetNode->set);
26342634
return deferredSetNode;
26352635
case 'c':
2636-
if (standardEncodedChars->IsWord(ECLookahead())) // terminating 0 is not a word
2636+
if (standardEncodedChars->IsWord(ECLookahead())) // terminating 0 is not a word character
26372637
{
26382638
c = UTC(Chars<EncodedChar>::CTU(ECLookahead()) % 32);
26392639
ECConsume();
26402640
// fall-through for identity escape
26412641
}
26422642
else
26432643
{
2644-
// If the lookahead is a non-alphanumeric and not a dash('-'), then treat '\' and 'c' separately.
2644+
// If the lookahead is a non-alphanumeric and not an underscore ('_'), then treat '\' and 'c' separately.
26452645
//#sec-regular-expression-patterns-semantics
26462646
ECRevert(1); //Put cursor back at 'c' and treat it as a non-escaped character.
26472647
deferredCharNode->cs[0] = '\\';

test/Regex/control_character_escapes.js

Lines changed: 31 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -26,63 +26,57 @@ var tests = [
2626
name : "Control characters followed by a word character ([A-Za-z0-9_])",
2727
body : function ()
2828
{
29-
re = /[\c6]+/; //'6' = ascii x36
29+
re = /[\c6]+/; //'6' = ascii x36, parsed as [\x16]+
3030
matchRegExp("6", re, null);
3131
matchRegExp("\\", re, null);
3232
matchRegExp("\\c6", re, null);
3333
matchRegExp("c", re, null);
3434
matchRegExp("\x16", re, "\x16");
3535

36-
re = /\c6/; //'6' = ascii x36
36+
re = /\c6/; //'6' = ascii x36, parsed as "\\c6"
3737
matchRegExp("\\c6", re, "\\c6");
3838
matchRegExp("\\", re, null);
3939
matchRegExp("6", re, null);
4040
matchRegExp("c", re, null);
4141
matchRegExp("\x16", re, null);
4242

43-
re = /\c6[\c6]+/; //'6' = ascii x36
43+
re = /\c6[\c6]+/; //'6' = ascii x36, parsed as "\\c6"[\x16]+
4444
matchRegExp("\\c6\x16", re, "\\c6\x16");
4545
matchRegExp("\\", re, null);
4646
matchRegExp("c", re, null);
4747
matchRegExp("\x16", re, null);
4848

49-
re = /[\ca]+/; //'a' = ascii x61
49+
re = /[\ca]+/; //'a' = ascii x61, parsed as [\x01]+
5050
matchRegExp("a", re, null);
5151
matchRegExp("\\", re, null);
5252
matchRegExp("c", re, null);
5353
matchRegExp("00xyzabc123\x01qrst", re, "\x01");
5454

55-
re = /[\c_]+/; //'_' = ascii 0x5F
55+
re = /[\c_]+/; //'_' = ascii 0x5F, parsed as [\x1F]+
5656
matchRegExp("\x1F\x1F\x05", re, "\x1F\x1F");
5757
matchRegExp("\\\\\\", re, null);
5858
matchRegExp("////", re, null);
5959
matchRegExp("ccc_", re, null);
6060

61-
re = /[\cG]*/; //'G' = ascii x47
61+
re = /[\cG]*/; //'G' = ascii x47, parsed as [\x07]*
6262
matchRegExp("\x07\x06\x05", re, "\x07");
6363
matchRegExp("\\\\", re, "");
6464
matchRegExp("////", re, "");
6565
matchRegExp("cccG", re, "");
6666

67-
re = /\cG\cf/; //'G' = ascii x47, 'f' = ascii x66
68-
matchRegExp("\x00\x03\x07\x06\x07\x08", re, "\x07\x06");
69-
matchRegExp("\\", re, null);
70-
matchRegExp("/", re, null);
71-
matchRegExp("\\cG\\c6\\cf", re, null);
72-
73-
re = /[\cG\c6\cf]+/; //'G' = ascii x47, '6' = ascii x36, 'f' = ascii x66
67+
re = /[\cG\c6\cf]+/; //'G' = ascii x47, '6' = ascii x36, 'f' = ascii x66, parsed as [\x07\x16\x06]+
7468
matchRegExp("\x00\x03\x07\x06\x16\x07\x08", re, "\x07\x06\x16\x07");
7569
matchRegExp("\\\\", re, null);
7670
matchRegExp("////", re, null);
7771
matchRegExp("cfG6", re, null);
7872

79-
re = /\cG\cf/; //'G' = ascii x47, 'f' = ascii x66
73+
re = /\cG\cf/; //'G' = ascii x47, 'f' = ascii x66, parsed as "\x07\x06"
8074
matchRegExp("\x00\x03\x07\x06\x16\x07\x08", re, "\x07\x06");
8175
matchRegExp("\\", re, null);
8276
matchRegExp("/", re, null);
8377
matchRegExp("\\cG\\c6\\cf", re, null);
8478

85-
re = /[\cz\cZ]+/; //'z' = ascii x7A, 'Z' = ascii x5A, have the same lowest 5 bits
79+
re = /[\cz\cZ]+/; //'z' = ascii x7A, 'Z' = ascii x5A, have the same lowest 5 bits, parsed as [\x1A]+
8680
matchRegExp("\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" +
8781
"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f", re, "\x1a");
8882
matchRegExp("\\\\", re, null);
@@ -94,28 +88,28 @@ var tests = [
9488
name : "Control characters followed by a non-word character ([^A-Za-z0-9_])",
9589
body : function ()
9690
{
97-
re = /[\c*]+/; //'*' = ascii 42
91+
re = /[\c*]+/; //'*' = ascii 42, parsed as [\\c*]+
9892
matchRegExp("\x0a\x09\x08", re, null);
9993
matchRegExp("a*c*b*d*", re, "*c*");
10094
matchRegExp("\\\\", re, "\\\\");
10195
matchRegExp("////", re, null);
10296
matchRegExp("ccc", re, "ccc");
10397

104-
re = /[\c}]*/; //'}' = ascii 125
98+
re = /[\c}]*/; //'}' = ascii 125, parsed as [\\c}]*
10599
matchRegExp("\x1d\x7d\x3d", re, "");
106100
matchRegExp("}c}}cd*c*b*d*", re, "}c}}c");
107101
matchRegExp("\\\\", re, "\\\\");
108102
matchRegExp("////", re, "");
109103
matchRegExp("ccc", re, "ccc");
110104

111-
re = /[\c;]+/; //';' = ascii 59
105+
re = /[\c;]+/; //';' = ascii 59, parsed as [\\c;]+
112106
matchRegExp("\x1b\x1c", re, null);
113107
matchRegExp("d;c;d;*", re, ";c;");
114108
matchRegExp("\\\\", re, "\\\\");
115109
matchRegExp("////", re, null);
116110
matchRegExp("ccc", re, "ccc");
117111

118-
re = /\c%/; //'%' = ascii x25
112+
re = /\c%/; //'%' = ascii x25, parsed as \\c%
119113
matchRegExp("\\", re, null);
120114
matchRegExp("\\", re, null);
121115
matchRegExp("\\c%", re, "\\c%");
@@ -126,67 +120,67 @@ var tests = [
126120
name : "Control Character tests with unicode flag present",
127121
body : function ()
128122
{
129-
re = /[\cAg]+/u; //'A' = ascii x41
123+
re = /[\cAg]+/u; //'A' = ascii x41, parsed as [g\x01]+
130124
matchRegExp("abcdefghi", re, "g");
131125
matchRegExp("\\\\", re, null);
132126
matchRegExp("////", re, null);
133127
matchRegExp("\x01\x01gg\x02\x04ggg", re, "\x01\x01gg");
134128

135-
re = /[\czA]+/u; //'z' = ascii x7A
129+
re = /[\czA]+/u; //'z' = ascii x7A, parsed as [\x1AA]+
136130
matchRegExp("abcdefghi", re, null);
137131
matchRegExp("\\\\", re, null);
138132
matchRegExp("////", re, null);
139133
matchRegExp("YZA\x1aABC", re, "A\x1aA");
140134

141-
assert.throws(() => eval("\"\".match(/[\\c]/u)"), SyntaxError, "Expected an error due to non-letters being disallowed from control character when unicode flag present",
135+
assert.throws(() => eval("\"\".match(/[\\c]/u)"), SyntaxError, "(Character class) Expected an error because escaped c must be followed by a letter when unicode flag is present, but is followed by no character here.",
142136
"Invalid regular expression: invalid escape in unicode pattern");
143-
assert.throws(() => eval("\"\".match(/[\\c-d]/u)"), SyntaxError, "Expected an error due to non-letters being disallowed from control character when unicode flag present",
137+
assert.throws(() => eval("\"\".match(/[\\c-d]/u)"), SyntaxError, "(Character class) Expected an error because escaped c must be followed by a letter when unicode flag is present, but is followed by a dash, '-', here.",
144138
"Invalid regular expression: invalid escape in unicode pattern");
145-
assert.throws(() => eval("\"\".match(/[ab\\c_$]/u)"), SyntaxError, "Expected an error due to non-letters being disallowed from control character when unicode flag present",
139+
assert.throws(() => eval("\"\".match(/[ab\\c_$]/u)"), SyntaxError, "(Character class) Expected an error because escaped c must be followed by a letter when unicode flag is present, but is followed by an underscore, '_', here.",
146140
"Invalid regular expression: invalid escape in unicode pattern");
147-
assert.throws(() => eval("\"\".match(/[ab\\c\\d]/u)"), SyntaxError, "Expected an error due to non-letters being disallowed from control character when unicode flag present",
141+
assert.throws(() => eval("\"\".match(/[ab\\c\\d]/u)"), SyntaxError, "(Character class) Expected an error because escaped c must be followed by a letter when unicode flag is present, but is followed by a backslash, '\\', here.",
148142
"Invalid regular expression: invalid escape in unicode pattern");
149-
assert.throws(() => eval("\"\".match(/[ab\\c3]/u)"), SyntaxError, "Expected an error due to non-letters being disallowed from control character when unicode flag present",
143+
assert.throws(() => eval("\"\".match(/[ab\\c3]/u)"), SyntaxError, "(Character class) Expected an error because escaped c must be followed by a letter when unicode flag is present, but is followed by a number, '3', here.",
150144
"Invalid regular expression: invalid escape in unicode pattern");
151145

152-
re = /\cAg/u; //'A' = ascii x41
146+
re = /\cAg/u; //'A' = ascii x41, parsed as "\x01g"
153147
matchRegExp("abcdefghi", re, null);
154148
matchRegExp("\\\\", re, null);
155149
matchRegExp("////", re, null);
156150
matchRegExp("\x01\x01gg\x02\x04ggg", re, "\x01g");
157151

158-
re = /\czA/u; //'z' = ascii x7A
152+
re = /\czA/u; //'z' = ascii x7A, parsed as "\x1aA"
159153
matchRegExp("abcdefghi", re, null);
160154
matchRegExp("\\\\", re, null);
161155
matchRegExp("////", re, null);
162156
matchRegExp("YZA\x1aABC", re, "\x1aA");
163157

164-
assert.throws(() => eval("\"\".match(/\\c/u)"), SyntaxError, "Expected an error due to non-letters being disallowed from control character when unicode flag present",
158+
assert.throws(() => eval("\"\".match(/\\c/u)"), SyntaxError, "(Non-character class) Expected an error because escaped c must be followed by a letter when unicode flag is present, but is followed by no character here.",
165159
"Invalid regular expression: invalid escape in unicode pattern");
166-
assert.throws(() => eval("\"\".match(/\\c-d/u)"), SyntaxError, "Expected an error due to non-letters being disallowed from control character when unicode flag present",
160+
assert.throws(() => eval("\"\".match(/\\c-d/u)"), SyntaxError, "(Non-character class) Expected an error because escaped c must be followed by a letter when unicode flag is present, but is followed by a dash, '-', here.",
167161
"Invalid regular expression: invalid escape in unicode pattern");
168-
assert.throws(() => eval("\"\".match(/ab\\c_$/u)"), SyntaxError, "Expected an error due to non-letters being disallowed from control character when unicode flag present",
162+
assert.throws(() => eval("\"\".match(/ab\\c_$/u)"), SyntaxError, "(Non-character class) Expected an error because escaped c must be followed by a letter when unicode flag is present, but is followed by an underscore, '_', here.",
169163
"Invalid regular expression: invalid escape in unicode pattern");
170-
assert.throws(() => eval("\"\".match(/ab\\c\\d/u)"), SyntaxError, "Expected an error due to non-letters being disallowed from control character when unicode flag present",
164+
assert.throws(() => eval("\"\".match(/ab\\c\\d/u)"), SyntaxError, "(Non-character class) Expected an error because escaped c must be followed by a letter when unicode flag is present, but is followed by a backslash, '\\', here.",
171165
"Invalid regular expression: invalid escape in unicode pattern");
172-
assert.throws(() => eval("\"\".match(/ab\\c3/u)"), SyntaxError, "Expected an error due to non-letters being disallowed from control character when unicode flag present",
166+
assert.throws(() => eval("\"\".match(/ab\\c3/u)"), SyntaxError, "(Non-character class) Expected an error because escaped c must be followed by a letter when unicode flag is present, but is followed by a number, '3', here.",
173167
"Invalid regular expression: invalid escape in unicode pattern");
174168
}
175169
},
176170
{
177171
name : "Control character edge cases",
178172
body : function ()
179173
{
180-
re = /[\c-g]+/; //'-' = ascii x2D
174+
re = /[\c-g]+/; //'-' = ascii x2D, parsed as [\\c-g]+
181175
matchRegExp("abcdefghi", re, "cdefg");
182176
matchRegExp("\\\\", re, "\\\\");
183177
matchRegExp("////", re, null);
184178
matchRegExp("\x0d", re, null);
185179
matchRegExp("aobd\\f\\d", re, "d\\f\\d");
186180

187-
re = /[\c-]+/; //'-' = ascii x2D
181+
re = /[\c-]+/; //'-' = ascii x2D, parsed as [\\c-]+
188182
matchRegExp("abcdefghi", re, "c");
189-
matchRegExp("\x0d", re, null);
183+
matchRegExp("\x0dc--c", re, "c--c");
190184
matchRegExp("\\\\", re, "\\\\");
191185
matchRegExp("////", re, null);
192186
matchRegExp("aobd\\f\\d", re, "\\");
@@ -198,4 +192,4 @@ var tests = [
198192

199193
testRunner.runTests(tests, {
200194
verbose : WScript.Arguments[0] != "summary"
201-
});
195+
});

0 commit comments

Comments
 (0)