Assertion when scanning invalid numeric literal after multi-unit characters

boingoing · boingoing · commit 853696860ee3 · 2018-08-15T13:49:15.000-07:00
We can throw an error when scanning numeric literals. If the numeric literal is preceeded by multi-unit whitespace (eaten as part of the numeric literal token) we will count up those multi-units into Scanner::m_cMultiUnits. However, if the numeric literal ends up throwing, we should reset the multi-unit counter in scanner to whatever it was before we started to scan the whitespace and numeric literal token. If we don't reset the multi-unit counter, the character offset for the error object will be wrong and debug build will assert. Fixes #5571
diff --git a/lib/Parser/Scan.cpp b/lib/Parser/Scan.cpp
@@ -587,7 +587,7 @@ IdentPtr Scanner<EncodingPolicy>::PidOfIdentiferAt(EncodedCharPtr p, EncodedChar
 }
 
 template <typename EncodingPolicy>
-typename Scanner<EncodingPolicy>::EncodedCharPtr Scanner<EncodingPolicy>::FScanNumber(EncodedCharPtr p, double *pdbl, bool& likelyInt)
+typename Scanner<EncodingPolicy>::EncodedCharPtr Scanner<EncodingPolicy>::FScanNumber(EncodedCharPtr p, double *pdbl, bool& likelyInt, size_t saveMultiUnits)
 {
     EncodedCharPtr last = m_pchLast;
     EncodedCharPtr pchT = nullptr;
@@ -686,6 +686,7 @@ typename Scanner<EncodingPolicy>::EncodedCharPtr Scanner<EncodingPolicy>::FScanN
     }
     if (this->charClassifier->IsIdStart(outChar))
     {
+        this->RestoreMultiUnits(saveMultiUnits);
         Error(ERRIdAfterLit);
     }
 
@@ -695,12 +696,14 @@ typename Scanner<EncodingPolicy>::EncodedCharPtr Scanner<EncodingPolicy>::FScanN
         startingLocation++; // TryReadEscape expects us to point to the 'u', and since it is by reference we need to do it beforehand.
         if (TryReadEscape(startingLocation, m_pchLast, &outChar))
         {
+            this->RestoreMultiUnits(saveMultiUnits);
             Error(ERRIdAfterLit);
         }
     }
 
     if (Js::NumberUtilities::IsDigit(*startingLocation))
     {
+        this->RestoreMultiUnits(saveMultiUnits);
         Error(ERRbadNumber);
     }
 
@@ -1616,6 +1619,8 @@ tokens Scanner<EncodingPolicy>::ScanCore(bool identifyKwds)
         }
     }
 
+    size_t saveMultiUnits = this->m_cMultiUnits;
+
     for (;;)
     {
 LLoop:
@@ -1720,9 +1725,10 @@ tokens Scanner<EncodingPolicy>::ScanCore(bool identifyKwds)
                 p = m_pchMinTok;
                 this->RestoreMultiUnits(m_cMinTokMultiUnits);
                 bool likelyInt = true;
-                pchT = FScanNumber(p, &dbl, likelyInt);
+                pchT = FScanNumber(p, &dbl, likelyInt, saveMultiUnits);
                 if (p == pchT)
                 {
+                    this->RestoreMultiUnits(saveMultiUnits);
                     Assert(this->PeekFirst(p, last) != '.');
                     Error(ERRbadNumber);
                 }
diff --git a/lib/Parser/Scan.h b/lib/Parser/Scan.h
@@ -500,7 +500,6 @@ class Scanner : public IScanner, public EncodingPolicy
     // character of the token would have if the entire file was converted to Unicode (UTF16-LE).
     charcount_t IchLimTok(void) const
     {
-
         Assert(m_currentCharacter - m_pchBase >= 0);
         Assert(m_currentCharacter - m_pchBase <= LONG_MAX);
         Assert(static_cast<charcount_t>(m_currentCharacter - m_pchBase) >= this->m_cMultiUnits);
@@ -788,7 +787,7 @@ class Scanner : public IScanner, public EncodingPolicy
     tokens SkipComment(EncodedCharPtr *pp, /* out */ bool* containTypeDef);
     tokens ScanRegExpConstant(ArenaAllocator* alloc);
     tokens ScanRegExpConstantNoAST(ArenaAllocator* alloc);
-    EncodedCharPtr FScanNumber(EncodedCharPtr p, double *pdbl, bool& likelyInt);
+    EncodedCharPtr FScanNumber(EncodedCharPtr p, double *pdbl, bool& likelyInt, size_t saveMultiUnits);
     IdentPtr PidOfIdentiferAt(EncodedCharPtr p, EncodedCharPtr last, bool fHadEscape, bool fHasMultiChar);
     IdentPtr PidOfIdentiferAt(EncodedCharPtr p, EncodedCharPtr last);
     uint32 UnescapeToTempBuf(EncodedCharPtr p, EncodedCharPtr last);
diff --git a/test/Scanner/NumericLiteralSuffix.js b/test/Scanner/NumericLiteralSuffix.js
@@ -82,6 +82,14 @@ var tests = [
             eval("\u2028var\u2028x\u2028=\u20281234\u2028; result = x;");
             assert.areEqual(1234, result, "Mutli-unit whitespace after numeric literal does not affect literal value");
         }
+    },
+    {
+        name: "Multi-unit count updated in the middle of a token",
+        body: function () {
+            assert.throws(() => eval('\u20091a'), SyntaxError, 'Multi-unit whitespace followed by numeric literal followed by identifier', 'Unexpected identifier after numeric literal');
+            assert.throws(() => eval('\u20091\\u0065'), SyntaxError, 'Multi-unit whitespace followed by numeric literal followed by unicode escape sequence', 'Unexpected identifier after numeric literal');
+            assert.throws(() => eval('\u20090o1239'), SyntaxError, 'Multi-unit whitespace followed by invalid octal numeric literal', 'Invalid number');
+        }
     }
 ];
 

Original file line number	Diff line number	Diff line change
`@@ -587,7 +587,7 @@ IdentPtr Scanner<EncodingPolicy>::PidOfIdentiferAt(EncodedCharPtr p, EncodedChar`
`587`	`587`	`}`
`588`	`588`
`589`	`589`	`template <typename EncodingPolicy>`
`590`		`-typename Scanner<EncodingPolicy>::EncodedCharPtr Scanner<EncodingPolicy>::FScanNumber(EncodedCharPtr p, double *pdbl, bool& likelyInt)`
	`590`	`+typename Scanner<EncodingPolicy>::EncodedCharPtr Scanner<EncodingPolicy>::FScanNumber(EncodedCharPtr p, double *pdbl, bool& likelyInt, size_t saveMultiUnits)`
`591`	`591`	`{`
`592`	`592`	`EncodedCharPtr last = m_pchLast;`
`593`	`593`	`EncodedCharPtr pchT = nullptr;`
`@@ -686,6 +686,7 @@ typename Scanner<EncodingPolicy>::EncodedCharPtr Scanner<EncodingPolicy>::FScanN`
`686`	`686`	`}`
`687`	`687`	`if (this->charClassifier->IsIdStart(outChar))`
`688`	`688`	`{`
	`689`	`+ this->RestoreMultiUnits(saveMultiUnits);`
`689`	`690`	`Error(ERRIdAfterLit);`
`690`	`691`	`}`
`691`	`692`
`@@ -695,12 +696,14 @@ typename Scanner<EncodingPolicy>::EncodedCharPtr Scanner<EncodingPolicy>::FScanN`
`695`	`696`	`startingLocation++; // TryReadEscape expects us to point to the 'u', and since it is by reference we need to do it beforehand.`
`696`	`697`	`if (TryReadEscape(startingLocation, m_pchLast, &outChar))`
`697`	`698`	`{`
	`699`	`+ this->RestoreMultiUnits(saveMultiUnits);`
`698`	`700`	`Error(ERRIdAfterLit);`
`699`	`701`	`}`
`700`	`702`	`}`
`701`	`703`
`702`	`704`	`if (Js::NumberUtilities::IsDigit(*startingLocation))`
`703`	`705`	`{`
	`706`	`+ this->RestoreMultiUnits(saveMultiUnits);`
`704`	`707`	`Error(ERRbadNumber);`
`705`	`708`	`}`
`706`	`709`
`@@ -1616,6 +1619,8 @@ tokens Scanner<EncodingPolicy>::ScanCore(bool identifyKwds)`
`1616`	`1619`	`}`
`1617`	`1620`	`}`
`1618`	`1621`
	`1622`	`+ size_t saveMultiUnits = this->m_cMultiUnits;`
	`1623`	`+`
`1619`	`1624`	`for (;;)`
`1620`	`1625`	`{`
`1621`	`1626`	`LLoop:`
`@@ -1720,9 +1725,10 @@ tokens Scanner<EncodingPolicy>::ScanCore(bool identifyKwds)`
`1720`	`1725`	`p = m_pchMinTok;`
`1721`	`1726`	`this->RestoreMultiUnits(m_cMinTokMultiUnits);`
`1722`	`1727`	`bool likelyInt = true;`
`1723`		`- pchT = FScanNumber(p, &dbl, likelyInt);`
	`1728`	`+ pchT = FScanNumber(p, &dbl, likelyInt, saveMultiUnits);`
`1724`	`1729`	`if (p == pchT)`
`1725`	`1730`	`{`
	`1731`	`+ this->RestoreMultiUnits(saveMultiUnits);`
`1726`	`1732`	`Assert(this->PeekFirst(p, last) != '.');`
`1727`	`1733`	`Error(ERRbadNumber);`
`1728`	`1734`	`}`
Original file line number	Diff line number	Diff line change
`@@ -82,6 +82,14 @@ var tests = [`
`82`	`82`	`eval("\u2028var\u2028x\u2028=\u20281234\u2028; result = x;");`
`83`	`83`	`assert.areEqual(1234, result, "Mutli-unit whitespace after numeric literal does not affect literal value");`
`84`	`84`	`}`
	`85`	`+ },`
	`86`	`+ {`
	`87`	`+ name: "Multi-unit count updated in the middle of a token",`
	`88`	`+ body: function () {`
	`89`	`+ assert.throws(() => eval('\u20091a'), SyntaxError, 'Multi-unit whitespace followed by numeric literal followed by identifier', 'Unexpected identifier after numeric literal');`
	`90`	`+ assert.throws(() => eval('\u20091\\u0065'), SyntaxError, 'Multi-unit whitespace followed by numeric literal followed by unicode escape sequence', 'Unexpected identifier after numeric literal');`
	`91`	`+ assert.throws(() => eval('\u20090o1239'), SyntaxError, 'Multi-unit whitespace followed by invalid octal numeric literal', 'Invalid number');`
	`92`	`+ }`
`85`	`93`	`}`
`86`	`94`	`];`
`87`	`95`