Add missing end-of-string checks to RegExp parser in unicode mode (#3875)

dbatyai · web-flow · commit fed1b0c8270f · 2020-06-08T11:09:08.000+02:00
Fixes #3870. Fixes #3871. JerryScript-DCO-1.0-Signed-off-by: Dániel Bátyai dbatyai@inf.u-szeged.hu
diff --git a/jerry-core/parser/regexp/re-parser.c b/jerry-core/parser/regexp/re-parser.c
@@ -612,30 +612,28 @@ re_parse_char_escape (re_compiler_ctx_t *re_ctx_p) /**< RegExp compiler context
 #if ENABLED (JERRY_ES2015)
       if (re_ctx_p->flags & RE_FLAG_UNICODE)
       {
-        if (*re_ctx_p->input_curr_p == LIT_CHAR_LEFT_BRACE)
+        if (re_ctx_p->input_curr_p + 1 < re_ctx_p->input_end_p
+            && re_ctx_p->input_curr_p[0] == LIT_CHAR_LEFT_BRACE
+            && lit_char_is_hex_digit (re_ctx_p->input_curr_p[1]))
         {
-          re_ctx_p->input_curr_p++;
+          lit_code_point_t cp = lit_char_hex_to_int (re_ctx_p->input_curr_p[1]);
+          re_ctx_p->input_curr_p += 2;
 
-          if (re_ctx_p->input_curr_p < re_ctx_p->input_end_p && lit_char_is_hex_digit (*re_ctx_p->input_curr_p))
+          while (re_ctx_p->input_curr_p < re_ctx_p->input_end_p && lit_char_is_hex_digit (*re_ctx_p->input_curr_p))
           {
-            lit_code_point_t cp = lit_char_hex_to_int (*re_ctx_p->input_curr_p++);
+            cp = cp * 16 + lit_char_hex_to_int (*re_ctx_p->input_curr_p++);
 
-            while (re_ctx_p->input_curr_p < re_ctx_p->input_end_p && lit_char_is_hex_digit (*re_ctx_p->input_curr_p))
+            if (JERRY_UNLIKELY (cp > LIT_UNICODE_CODE_POINT_MAX))
             {
-              cp = cp * 16 + lit_char_hex_to_int (*re_ctx_p->input_curr_p++);
-
-              if (JERRY_UNLIKELY (cp > LIT_UNICODE_CODE_POINT_MAX))
-              {
-                return ecma_raise_syntax_error (ECMA_ERR_MSG ("Invalid unicode escape sequence"));
-              }
+              return ecma_raise_syntax_error (ECMA_ERR_MSG ("Invalid unicode escape sequence"));
             }
+          }
 
-            if (re_ctx_p->input_curr_p < re_ctx_p->input_end_p && *re_ctx_p->input_curr_p == LIT_CHAR_RIGHT_BRACE)
-            {
-              re_ctx_p->input_curr_p++;
-              re_ctx_p->token.value = cp;
-              break;
-            }
+          if (re_ctx_p->input_curr_p < re_ctx_p->input_end_p && *re_ctx_p->input_curr_p == LIT_CHAR_RIGHT_BRACE)
+          {
+            re_ctx_p->input_curr_p++;
+            re_ctx_p->token.value = cp;
+            break;
           }
         }
 
@@ -867,7 +865,8 @@ re_parse_next_token (re_compiler_ctx_t *re_ctx_p) /**< RegExp compiler context *
 
 #if ENABLED (JERRY_ES2015)
       if (re_ctx_p->flags & RE_FLAG_UNICODE
-          && lit_is_code_point_utf16_high_surrogate (ch))
+          && lit_is_code_point_utf16_high_surrogate (ch)
+          && re_ctx_p->input_curr_p < re_ctx_p->input_end_p)
       {
         const ecma_char_t next = lit_cesu8_peek_next (re_ctx_p->input_curr_p);
         if (lit_is_code_point_utf16_low_surrogate (next))
diff --git a/tests/jerry/es2015/regression-test-issue-3870.js b/tests/jerry/es2015/regression-test-issue-3870.js
@@ -0,0 +1,15 @@
+// Copyright JS Foundation and other contributors, http://js.foundation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+assert (new RegExp("\ud800", "u").exec("\ud800")[0] === "\ud800");
diff --git a/tests/jerry/es2015/regression-test-issue-3871.js b/tests/jerry/es2015/regression-test-issue-3871.js
@@ -0,0 +1,20 @@
+// Copyright JS Foundation and other contributors, http://js.foundation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+try {
+  new RegExp('"\\u', 'u');
+  assert (false);
+} catch (e) {
+  assert (e instanceof SyntaxError);
+}