[MERGE #5601 @boingoing] Assertion when scanning invalid numeric literal after multi-unit characters

boingoing · boingoing · commit 19738b7e42b4 · 2018-08-15T16:56:13.000-07:00
Merge pull request #5601 from boingoing:multiunit_assert We can throw an error when scanning numeric literals. If the numeric literal is preceeded by multi-unit whitespace (eaten as part of the numeric literal token) we will count up those multi-units into Scanner::m_cMultiUnits. However, if the numeric literal ends up throwing, we should reset the multi-unit counter in scanner to whatever it was before we started to scan the whitespace and numeric literal token. If we don't reset the multi-unit counter, the character offset for the error object will be wrong and debug build will assert. Fixes #5571
diff --git a/lib/Parser/Scan.cpp b/lib/Parser/Scan.cpp
@@ -587,7 +587,7 @@ IdentPtr Scanner<EncodingPolicy>::PidOfIdentiferAt(EncodedCharPtr p, EncodedChar
 }
 
 template <typename EncodingPolicy>
-typename Scanner<EncodingPolicy>::EncodedCharPtr Scanner<EncodingPolicy>::FScanNumber(EncodedCharPtr p, double *pdbl, bool& likelyInt)
+typename Scanner<EncodingPolicy>::EncodedCharPtr Scanner<EncodingPolicy>::FScanNumber(EncodedCharPtr p, double *pdbl, bool& likelyInt, size_t savedMultiUnits)
 {
     EncodedCharPtr last = m_pchLast;
     EncodedCharPtr pchT = nullptr;
@@ -686,6 +686,7 @@ typename Scanner<EncodingPolicy>::EncodedCharPtr Scanner<EncodingPolicy>::FScanN
     }
     if (this->charClassifier->IsIdStart(outChar))
     {
+        this->RestoreMultiUnits(savedMultiUnits);
         Error(ERRIdAfterLit);
     }
 
@@ -695,12 +696,14 @@ typename Scanner<EncodingPolicy>::EncodedCharPtr Scanner<EncodingPolicy>::FScanN
         startingLocation++; // TryReadEscape expects us to point to the 'u', and since it is by reference we need to do it beforehand.
         if (TryReadEscape(startingLocation, m_pchLast, &outChar))
         {
+            this->RestoreMultiUnits(savedMultiUnits);
             Error(ERRIdAfterLit);
         }
     }
 
     if (Js::NumberUtilities::IsDigit(*startingLocation))
     {
+        this->RestoreMultiUnits(savedMultiUnits);
         Error(ERRbadNumber);
     }
 
@@ -1587,6 +1590,7 @@ tokens Scanner<EncodingPolicy>::ScanCore(bool identifyKwds)
     m_tkPrevious = m_ptoken->tk;
     m_iecpLimTokPrevious = IecpLimTok();    // Introduced for use by lambda parsing to find correct span of expression lambdas
     m_ichLimTokPrevious = IchLimTok();
+    size_t savedMultiUnits = this->m_cMultiUnits;
 
     if (p >= last)
     {
@@ -1720,9 +1724,10 @@ tokens Scanner<EncodingPolicy>::ScanCore(bool identifyKwds)
                 p = m_pchMinTok;
                 this->RestoreMultiUnits(m_cMinTokMultiUnits);
                 bool likelyInt = true;
-                pchT = FScanNumber(p, &dbl, likelyInt);
+                pchT = FScanNumber(p, &dbl, likelyInt, savedMultiUnits);
                 if (p == pchT)
                 {
+                    this->RestoreMultiUnits(savedMultiUnits);
                     Assert(this->PeekFirst(p, last) != '.');
                     Error(ERRbadNumber);
                 }
diff --git a/lib/Parser/Scan.h b/lib/Parser/Scan.h
@@ -500,7 +500,6 @@ class Scanner : public IScanner, public EncodingPolicy
     // character of the token would have if the entire file was converted to Unicode (UTF16-LE).
     charcount_t IchLimTok(void) const
     {
-
         Assert(m_currentCharacter - m_pchBase >= 0);
         Assert(m_currentCharacter - m_pchBase <= LONG_MAX);
         Assert(static_cast<charcount_t>(m_currentCharacter - m_pchBase) >= this->m_cMultiUnits);
@@ -788,7 +787,7 @@ class Scanner : public IScanner, public EncodingPolicy
     tokens SkipComment(EncodedCharPtr *pp, /* out */ bool* containTypeDef);
     tokens ScanRegExpConstant(ArenaAllocator* alloc);
     tokens ScanRegExpConstantNoAST(ArenaAllocator* alloc);
-    EncodedCharPtr FScanNumber(EncodedCharPtr p, double *pdbl, bool& likelyInt);
+    EncodedCharPtr FScanNumber(EncodedCharPtr p, double *pdbl, bool& likelyInt, size_t savedMultiUnits);
     IdentPtr PidOfIdentiferAt(EncodedCharPtr p, EncodedCharPtr last, bool fHadEscape, bool fHasMultiChar);
     IdentPtr PidOfIdentiferAt(EncodedCharPtr p, EncodedCharPtr last);
     uint32 UnescapeToTempBuf(EncodedCharPtr p, EncodedCharPtr last);
diff --git a/test/Scanner/NumericLiteralSuffix.js b/test/Scanner/NumericLiteralSuffix.js
@@ -82,6 +82,16 @@ var tests = [
             eval("\u2028var\u2028x\u2028=\u20281234\u2028; result = x;");
             assert.areEqual(1234, result, "Mutli-unit whitespace after numeric literal does not affect literal value");
         }
+    },
+    {
+        name: "Multi-unit count updated in the middle of a token",
+        body: function () {
+            if (WScript.Platform.INTL_LIBRARY === "winglob" || WScript.Platform.INTL_LIBRARY === "icu") {
+                assert.throws(() => eval('\u20091a'), SyntaxError, 'Multi-unit whitespace followed by numeric literal followed by identifier', 'Unexpected identifier after numeric literal');
+                assert.throws(() => eval('\u20091\\u0065'), SyntaxError, 'Multi-unit whitespace followed by numeric literal followed by unicode escape sequence', 'Unexpected identifier after numeric literal');
+                assert.throws(() => eval('\u20090o1239'), SyntaxError, 'Multi-unit whitespace followed by invalid octal numeric literal', 'Invalid number');
+            }
+        }
     }
 ];
 

Original file line number	Diff line number	Diff line change
`@@ -587,7 +587,7 @@ IdentPtr Scanner<EncodingPolicy>::PidOfIdentiferAt(EncodedCharPtr p, EncodedChar`
`587`	`587`	`}`
`588`	`588`
`589`	`589`	`template <typename EncodingPolicy>`
`590`		`-typename Scanner<EncodingPolicy>::EncodedCharPtr Scanner<EncodingPolicy>::FScanNumber(EncodedCharPtr p, double *pdbl, bool& likelyInt)`
	`590`	`+typename Scanner<EncodingPolicy>::EncodedCharPtr Scanner<EncodingPolicy>::FScanNumber(EncodedCharPtr p, double *pdbl, bool& likelyInt, size_t savedMultiUnits)`
`591`	`591`	`{`
`592`	`592`	`EncodedCharPtr last = m_pchLast;`
`593`	`593`	`EncodedCharPtr pchT = nullptr;`
`@@ -686,6 +686,7 @@ typename Scanner<EncodingPolicy>::EncodedCharPtr Scanner<EncodingPolicy>::FScanN`
`686`	`686`	`}`
`687`	`687`	`if (this->charClassifier->IsIdStart(outChar))`
`688`	`688`	`{`
	`689`	`+ this->RestoreMultiUnits(savedMultiUnits);`
`689`	`690`	`Error(ERRIdAfterLit);`
`690`	`691`	`}`
`691`	`692`
`@@ -695,12 +696,14 @@ typename Scanner<EncodingPolicy>::EncodedCharPtr Scanner<EncodingPolicy>::FScanN`
`695`	`696`	`startingLocation++; // TryReadEscape expects us to point to the 'u', and since it is by reference we need to do it beforehand.`
`696`	`697`	`if (TryReadEscape(startingLocation, m_pchLast, &outChar))`
`697`	`698`	`{`
	`699`	`+ this->RestoreMultiUnits(savedMultiUnits);`
`698`	`700`	`Error(ERRIdAfterLit);`
`699`	`701`	`}`
`700`	`702`	`}`
`701`	`703`
`702`	`704`	`if (Js::NumberUtilities::IsDigit(*startingLocation))`
`703`	`705`	`{`
	`706`	`+ this->RestoreMultiUnits(savedMultiUnits);`
`704`	`707`	`Error(ERRbadNumber);`
`705`	`708`	`}`
`706`	`709`
`@@ -1587,6 +1590,7 @@ tokens Scanner<EncodingPolicy>::ScanCore(bool identifyKwds)`
`1587`	`1590`	`m_tkPrevious = m_ptoken->tk;`
`1588`	`1591`	`m_iecpLimTokPrevious = IecpLimTok(); // Introduced for use by lambda parsing to find correct span of expression lambdas`
`1589`	`1592`	`m_ichLimTokPrevious = IchLimTok();`
	`1593`	`+ size_t savedMultiUnits = this->m_cMultiUnits;`
`1590`	`1594`
`1591`	`1595`	`if (p >= last)`
`1592`	`1596`	`{`
`@@ -1720,9 +1724,10 @@ tokens Scanner<EncodingPolicy>::ScanCore(bool identifyKwds)`
`1720`	`1724`	`p = m_pchMinTok;`
`1721`	`1725`	`this->RestoreMultiUnits(m_cMinTokMultiUnits);`
`1722`	`1726`	`bool likelyInt = true;`
`1723`		`- pchT = FScanNumber(p, &dbl, likelyInt);`
	`1727`	`+ pchT = FScanNumber(p, &dbl, likelyInt, savedMultiUnits);`
`1724`	`1728`	`if (p == pchT)`
`1725`	`1729`	`{`
	`1730`	`+ this->RestoreMultiUnits(savedMultiUnits);`
`1726`	`1731`	`Assert(this->PeekFirst(p, last) != '.');`
`1727`	`1732`	`Error(ERRbadNumber);`
`1728`	`1733`	`}`