From 82dece19c7b7132d5e7ab480ddf3d1e6d59e301e Mon Sep 17 00:00:00 2001
From: MayankRaj435 <1bi22ri032@bit-bangalore.edu.in>
Date: Mon, 16 Mar 2026 12:19:22 +0530
Subject: [PATCH] Refactor: Optimize hex escape parsing in lexer

---
 core/parser/src/lexer/cursor.rs |  21 +-----
 core/parser/src/lexer/string.rs | 122 ++++++++++++++------------------
 2 files changed, 53 insertions(+), 90 deletions(-)
diff --git a/core/parser/src/lexer/cursor.rs b/core/parser/src/lexer/cursor.rs
index 97624b11e64..cd562068257 100644
--- a/core/parser/src/lexer/cursor.rs
+++ b/core/parser/src/lexer/cursor.rs
@@ -2,7 +2,7 @@
 
 use crate::source::{ReadChar, UTF8Input};
 use boa_ast::{LinearPosition, Position, PositionGroup, SourceText};
-use std::io::{self, Error, ErrorKind};
+use std::io::{self, Error};
 
 /// Cursor over the source code.
 #[derive(Debug)]
@@ -139,25 +139,6 @@ impl<R: ReadChar> Cursor<R> {
         })
     }
 
-    /// Fills the buffer with all bytes until the stop byte is found.
-    /// Returns error when reaching the end of the buffer.
-    ///
-    /// Note that all bytes up until the stop byte are added to the buffer, including the byte right before.
-    pub(super) fn take_until(&mut self, stop: u32, buf: &mut Vec<u32>) -> io::Result<()> {
-        loop {
-            if self.next_if(stop)? {
-                return Ok(());
-            } else if let Some(c) = self.next_char()? {
-                buf.push(c);
-            } else {
-                return Err(Error::new(
-                    ErrorKind::UnexpectedEof,
-                    format!("Unexpected end of file when looking for character {stop}"),
-                ));
-            }
-        }
-    }
-
     /// Fills a mutable slice up to the ends while characters are alphabetic. Returns
     /// the number of characters read, or `N+1` if the buffer was filled but there were
     /// still characters after.
diff --git a/core/parser/src/lexer/string.rs b/core/parser/src/lexer/string.rs
index 064b7dfcbaf..ec75df03664 100644
--- a/core/parser/src/lexer/string.rs
+++ b/core/parser/src/lexer/string.rs
@@ -250,69 +250,63 @@ impl StringLiteral {
     {
         // Support \u{X..X} (Unicode CodePoint)
         if cursor.next_if(0x7B /* { */)? {
-            // TODO: use bytes for a bit better performance (using stack)
-            let mut code_point_buf = Vec::with_capacity(6);
-            cursor.take_until(0x7D /* } */, &mut code_point_buf)?;
-
-            let mut s = String::with_capacity(code_point_buf.len());
-            for c in code_point_buf {
-                if let Some(c) = char::from_u32(c) {
-                    s.push(c);
-                } else {
+            let mut code_point = 0u32;
+            let mut first_digit = true;
+            loop {
+                let pos = cursor.pos();
+                let Some(c) = cursor.next_char()? else {
+                    return Err(Error::syntax(
+                        "Unexpected end of file when looking for character }",
+                        pos,
+                    ));
+                };
+                if c == 0x7D
+                /* } */
+                {
+                    if first_digit {
+                        return Err(Error::syntax(
+                            "malformed Unicode character escape sequence",
+                            start_pos,
+                        ));
+                    }
+                    break;
+                }
+
+                let Some(digit) = char::from_u32(c).and_then(|c| c.to_digit(16)) else {
                     return Err(Error::syntax(
                         "malformed Unicode character escape sequence",
                         start_pos,
                     ));
-                }
-            }
+                };
 
-            let Ok(code_point) = u32::from_str_radix(&s, 16) else {
-                return Err(Error::syntax(
-                    "malformed Unicode character escape sequence",
-                    start_pos,
-                ));
-            };
+                code_point = (code_point << 4) | digit;
 
-            // UTF16Encoding of a numeric code point value
-            if code_point > 0x10_FFFF {
-                return Err(Error::syntax(
-                    "Unicode codepoint must not be greater than 0x10FFFF in escape sequence",
-                    start_pos,
-                ));
+                if code_point > 0x10_FFFF {
+                    return Err(Error::syntax(
+                        "Unicode codepoint must not be greater than 0x10FFFF in escape sequence",
+                        start_pos,
+                    ));
+                }
+                first_digit = false;
             }
 
             Ok(code_point)
         } else {
             // Grammar: Hex4Digits
             // Collect each character after \u e.g \uD83D will give "D83D"
-            let mut buffer = [0u32; 4];
-            buffer[0] = cursor
-                .next_char()?
-                .ok_or_else(|| Error::syntax("invalid Unicode escape sequence", start_pos))?;
-            buffer[1] = cursor
-                .next_char()?
-                .ok_or_else(|| Error::syntax("invalid Unicode escape sequence", start_pos))?;
-            buffer[2] = cursor
-                .next_char()?
-                .ok_or_else(|| Error::syntax("invalid Unicode escape sequence", start_pos))?;
-            buffer[3] = cursor
-                .next_char()?
-                .ok_or_else(|| Error::syntax("invalid Unicode escape sequence", start_pos))?;
-
-            let mut s = String::with_capacity(buffer.len());
-            for c in buffer {
-                if let Some(c) = char::from_u32(c) {
-                    s.push(c);
-                } else {
+            let mut code_point = 0u32;
+            for _ in 0..4 {
+                let pos = cursor.pos();
+                let c = cursor
+                    .next_char()?
+                    .ok_or_else(|| Error::syntax("invalid Unicode escape sequence", pos))?;
+                let Some(digit) = char::from_u32(c).and_then(|c| c.to_digit(16)) else {
                     return Err(Error::syntax("invalid Unicode escape sequence", start_pos));
-                }
+                };
+                code_point = (code_point << 4) | digit;
             }
 
-            let Ok(code_point) = u16::from_str_radix(&s, 16) else {
-                return Err(Error::syntax("invalid Unicode escape sequence", start_pos));
-            };
-
-            Ok(u32::from(code_point))
+            Ok(code_point)
         }
     }
 
@@ -323,34 +317,22 @@ impl StringLiteral {
     where
         R: ReadChar,
     {
-        let mut buffer = [0u32; 2];
-        buffer[0] = cursor
-            .next_char()?
-            .ok_or_else(|| Error::syntax("invalid Hexadecimal escape sequence", start_pos))?;
-        buffer[1] = cursor
-            .next_char()?
-            .ok_or_else(|| Error::syntax("invalid Hexadecimal escape sequence", start_pos))?;
-
-        let mut s = String::with_capacity(buffer.len());
-        for c in buffer {
-            if let Some(c) = char::from_u32(c) {
-                s.push(c);
-            } else {
+        let mut code_point = 0u32;
+        for _ in 0..2 {
+            let pos = cursor.pos();
+            let c = cursor
+                .next_char()?
+                .ok_or_else(|| Error::syntax("invalid Hexadecimal escape sequence", pos))?;
+            let Some(digit) = char::from_u32(c).and_then(|c| c.to_digit(16)) else {
                 return Err(Error::syntax(
                     "invalid Hexadecimal escape sequence",
                     start_pos,
                 ));
-            }
+            };
+            code_point = (code_point << 4) | digit;
         }
 
-        let Ok(code_point) = u16::from_str_radix(&s, 16) else {
-            return Err(Error::syntax(
-                "invalid Hexadecimal escape sequence",
-                start_pos,
-            ));
-        };
-
-        Ok(u32::from(code_point))
+        Ok(code_point)
     }
 
     fn take_legacy_octal_escape_sequence<R>(