Skip to content

Commit 4a7231a

Browse files
committed
Implement RegExp dotAll flag
1 parent 45a6271 commit 4a7231a

28 files changed

+332
-32
lines changed

lib/Common/ConfigFlagsList.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -640,6 +640,7 @@ PHASE(All)
640640
#define DEFAULT_CONFIG_ES6UnicodeVerbose (true)
641641
#define DEFAULT_CONFIG_ES6Unscopables (true)
642642
#define DEFAULT_CONFIG_ES6RegExSticky (true)
643+
#define DEFAULT_CONFIG_ES2018RegExDotAll (true)
643644
#ifdef COMPILE_DISABLE_ES6RegExPrototypeProperties
644645
// If ES6RegExPrototypeProperties needs to be disabled by compile flag, DEFAULT_CONFIG_ES6RegExPrototypeProperties should be false
645646
#define DEFAULT_CONFIG_ES6RegExPrototypeProperties (false)
@@ -1135,6 +1136,7 @@ FLAGPR (Boolean, ES6, ES6Unicode , "Enable ES6 Unicode 6.0
11351136
FLAGPR (Boolean, ES6, ES6UnicodeVerbose , "Enable ES6 Unicode 6.0 verbose failure output" , DEFAULT_CONFIG_ES6UnicodeVerbose)
11361137
FLAGPR (Boolean, ES6, ES6Unscopables , "Enable ES6 With Statement Unscopables" , DEFAULT_CONFIG_ES6Unscopables)
11371138
FLAGPR (Boolean, ES6, ES6RegExSticky , "Enable ES6 RegEx sticky flag" , DEFAULT_CONFIG_ES6RegExSticky)
1139+
FLAGPR (Boolean, ES6, ES2018RegExDotAll , "Enable ES2018 RegEx dotAll flag" , DEFAULT_CONFIG_ES2018RegExDotAll)
11381140

11391141
#ifndef COMPILE_DISABLE_ES6RegExPrototypeProperties
11401142
#define COMPILE_DISABLE_ES6RegExPrototypeProperties 0

lib/Parser/RegexFlags.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ namespace UnifiedRegex
1414
MultilineRegexFlag = 1 << 2,
1515
UnicodeRegexFlag = 1 << 3,
1616
StickyRegexFlag = 1 << 4,
17-
AllRegexFlags = (1 << 5) - 1
17+
DotAllRegexFlag = 1 << 5,
18+
AllRegexFlags = (1 << 6) - 1
1819
};
1920
}

lib/Parser/RegexParser.cpp

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,7 @@ namespace UnifiedRegex
145145
, tempLocationOfRange(nullptr)
146146
, codePointAtTempLocation(0)
147147
, unicodeFlagPresent(false)
148+
, dotAllFlagPresent(false)
148149
, caseInsensitiveFlagPresent(false)
149150
, positionAfterLastSurrogate(nullptr)
150151
, valueOfLastSurrogate(INVALID_CODEPOINT)
@@ -2758,6 +2759,16 @@ namespace UnifiedRegex
27582759
}
27592760
flags = (RegexFlags)(flags | MultilineRegexFlag);
27602761
break;
2762+
case 's':
2763+
if (scriptContext->GetConfig()->IsES2018RegExDotAllEnabled())
2764+
{
2765+
if ((flags & DotAllRegexFlag) != 0)
2766+
{
2767+
Fail(JSERR_RegExpSyntax);
2768+
}
2769+
flags = (RegexFlags)(flags | DotAllRegexFlag);
2770+
break;
2771+
}
27612772
case 'u':
27622773
// If we don't have unicode enabled, fall through to default
27632774
if (scriptContext->GetConfig()->IsES6UnicodeExtensionsEnabled())
@@ -2832,12 +2843,15 @@ namespace UnifiedRegex
28322843
Fail(JSERR_RegExpSyntax);
28332844
this->unicodeFlagPresent = (flags & UnifiedRegex::UnicodeRegexFlag) == UnifiedRegex::UnicodeRegexFlag;
28342845
this->caseInsensitiveFlagPresent = (flags & UnifiedRegex::IgnoreCaseRegexFlag) == UnifiedRegex::IgnoreCaseRegexFlag;
2846+
this->dotAllFlagPresent = (flags & UnifiedRegex::DotAllRegexFlag) == UnifiedRegex::DotAllRegexFlag;
28352847
Assert(!this->unicodeFlagPresent || scriptContext->GetConfig()->IsES6UnicodeExtensionsEnabled());
2848+
Assert(!this->dotAllFlagPresent || scriptContext->GetConfig()->IsES2018RegExDotAllEnabled());
28362849
}
28372850
else
28382851
{
28392852
this->unicodeFlagPresent = false;
28402853
this->caseInsensitiveFlagPresent = false;
2854+
this->dotAllFlagPresent = false;
28412855
}
28422856

28432857
// If this HR has been set, that means we have an earlier failure than the one caught above.
@@ -2891,6 +2905,7 @@ namespace UnifiedRegex
28912905
Options(flags);
28922906
this->unicodeFlagPresent = (flags & UnifiedRegex::UnicodeRegexFlag) == UnifiedRegex::UnicodeRegexFlag;
28932907
this->caseInsensitiveFlagPresent = (flags & UnifiedRegex::IgnoreCaseRegexFlag) == UnifiedRegex::IgnoreCaseRegexFlag;
2908+
this->dotAllFlagPresent = (flags & UnifiedRegex::DotAllRegexFlag) == UnifiedRegex::DotAllRegexFlag;
28942909
Assert(!this->unicodeFlagPresent || scriptContext->GetConfig()->IsES6UnicodeExtensionsEnabled());
28952910

28962911
// If this HR has been set, that means we have an earlier failure than the one caught above.
@@ -2946,6 +2961,7 @@ namespace UnifiedRegex
29462961
Options(dummyFlags);
29472962
this->unicodeFlagPresent = (dummyFlags & UnifiedRegex::UnicodeRegexFlag) == UnifiedRegex::UnicodeRegexFlag;
29482963
this->caseInsensitiveFlagPresent = (dummyFlags & UnifiedRegex::IgnoreCaseRegexFlag) == UnifiedRegex::IgnoreCaseRegexFlag;
2964+
this->dotAllFlagPresent = (dummyFlags & UnifiedRegex::DotAllRegexFlag) == UnifiedRegex::DotAllRegexFlag;
29492965
outTotalEncodedChars = Chars<EncodedChar>::OSB(next, input);
29502966
outTotalChars = Pos();
29512967

@@ -3101,7 +3117,14 @@ namespace UnifiedRegex
31013117
switch (cc)
31023118
{
31033119
case '.':
3104-
standardChars->SetNonNewline(ctAllocator, partialPrefixSetNode->set);
3120+
if (this->dotAllFlagPresent)
3121+
{
3122+
standardChars->SetFullSet(ctAllocator, partialPrefixSetNode->set);
3123+
}
3124+
else
3125+
{
3126+
standardChars->SetNonNewline(ctAllocator, partialPrefixSetNode->set);
3127+
}
31053128
break;
31063129
case 'S':
31073130
standardChars->SetNonWhitespace(ctAllocator, partialPrefixSetNode->set);
@@ -3137,7 +3160,14 @@ namespace UnifiedRegex
31373160
switch (cc)
31383161
{
31393162
case '.':
3140-
standardChars->SetNonNewline(ctAllocator, setNode->set);
3163+
if (this->dotAllFlagPresent)
3164+
{
3165+
standardChars->SetFullSet(ctAllocator, setNode->set);
3166+
}
3167+
else
3168+
{
3169+
standardChars->SetNonNewline(ctAllocator, setNode->set);
3170+
}
31413171
break;
31423172
case 'S':
31433173
standardChars->SetNonWhitespace(ctAllocator, setNode->set);

lib/Parser/RegexParser.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@ namespace UnifiedRegex
107107
SurrogatePairTracker* currentSurrogatePairNode;
108108
bool unicodeFlagPresent;
109109
bool caseInsensitiveFlagPresent;
110+
bool dotAllFlagPresent;
110111

111112
// The following two variables are used to determine if the the surrogate pair has been encountered
112113
// First holds the temporary location, second holds the value of the codepoint

lib/Parser/RegexPattern.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,11 @@ namespace UnifiedRegex
9393
return (rep.unified.program->flags & IgnoreCaseRegexFlag) != 0;
9494
}
9595

96+
bool RegexPattern::IsDotAll() const
97+
{
98+
return GetScriptContext()->GetConfig()->IsES2018RegExDotAllEnabled() && (rep.unified.program->flags & DotAllRegexFlag) != 0;
99+
}
100+
96101
bool RegexPattern::IsGlobal() const
97102
{
98103
return (rep.unified.program->flags & GlobalRegexFlag) != 0;
@@ -195,6 +200,8 @@ namespace UnifiedRegex
195200
w->Print(_u("g"));
196201
if (IsMultiline())
197202
w->Print(_u("m"));
203+
if (IsDotAll())
204+
w->Print(_u("s"));
198205
if (IsUnicode())
199206
w->Print(_u("u"));
200207
if (IsSticky())

lib/Parser/RegexPattern.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ namespace UnifiedRegex
6262
bool IsIgnoreCase() const;
6363
bool IsGlobal() const;
6464
bool IsMultiline() const;
65+
bool IsDotAll() const;
6566
bool IsUnicode() const;
6667
bool IsSticky() const;
6768
bool WasLastMatchSuccessful() const;

lib/Parser/RegexRuntime.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5755,9 +5755,10 @@ namespace UnifiedRegex
57555755
w->Print(_u("flags: "));
57565756
if ((flags & GlobalRegexFlag) != 0) w->Print(_u("global "));
57575757
if ((flags & MultilineRegexFlag) != 0) w->Print(_u("multiline "));
5758-
if ((flags & IgnoreCaseRegexFlag) != 0) w->Print(_u("ignorecase"));
5759-
if ((flags & UnicodeRegexFlag) != 0) w->Print(_u("unicode"));
5760-
if ((flags & StickyRegexFlag) != 0) w->Print(_u("sticky"));
5758+
if ((flags & IgnoreCaseRegexFlag) != 0) w->Print(_u("ignorecase "));
5759+
if ((flags & DotAllRegexFlag) != 0) w->Print(_u("dotAll "));
5760+
if ((flags & UnicodeRegexFlag) != 0) w->Print(_u("unicode "));
5761+
if ((flags & StickyRegexFlag) != 0) w->Print(_u("sticky "));
57615762
w->EOL();
57625763
w->PrintEOL(_u("numGroups: %d"), numGroups);
57635764
w->PrintEOL(_u("numLoops: %d"), numLoops);

lib/Parser/StandardChars.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -261,6 +261,11 @@ END {
261261
set.SetNotRanges(setAllocator, numNewlinePairs, newlineStr);
262262
}
263263

264+
void StandardChars<char16>::SetFullSet(ArenaAllocator* setAllocator, CharSet<Char> &set)
265+
{
266+
set.SetNotRanges(allocator, 0, nullptr);
267+
}
268+
264269
CharSet<char16>* StandardChars<char16>::GetFullSet()
265270
{
266271
if (fullSet == 0)

lib/Parser/StandardChars.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -323,6 +323,7 @@ namespace UnifiedRegex
323323
void SetNonWordIUChars(ArenaAllocator* setAllocator, CharSet<Char> &set);
324324
void SetNewline(ArenaAllocator* setAllocator, CharSet<Char> &set);
325325
void SetNonNewline(ArenaAllocator* setAllocator, CharSet<Char> &set);
326+
void SetFullSet(ArenaAllocator* setAllocator, CharSet<Char> &set);
326327

327328
CharSet<Char>* GetFullSet();
328329
CharSet<Char>* GetEmptySet();

lib/Runtime/Base/JnDirectFields.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -422,6 +422,7 @@ ENTRY(compile)
422422
ENTRY(global)
423423
ENTRY(lastIndex)
424424
ENTRY(multiline)
425+
ENTRY(dotAll)
425426
ENTRY(ignoreCase)
426427
ENTRY(unicode)
427428
ENTRY(sticky)

0 commit comments

Comments
 (0)