Skip to content

Commit 0933a1e

Browse files
committed
Refactor: Optimize hex escape parsing in lexer
1 parent 40f9990 commit 0933a1e

File tree

2 files changed

+53
-90
lines changed

2 files changed

+53
-90
lines changed

core/parser/src/lexer/cursor.rs

Lines changed: 1 addition & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
33
use crate::source::{ReadChar, UTF8Input};
44
use boa_ast::{LinearPosition, Position, PositionGroup, SourceText};
5-
use std::io::{self, Error, ErrorKind};
5+
use std::io::{self, Error};
66

77
/// Cursor over the source code.
88
#[derive(Debug)]
@@ -139,25 +139,6 @@ impl<R: ReadChar> Cursor<R> {
139139
})
140140
}
141141

142-
/// Fills the buffer with all bytes until the stop byte is found.
143-
/// Returns error when reaching the end of the buffer.
144-
///
145-
/// Note that all bytes up until the stop byte are added to the buffer, including the byte right before.
146-
pub(super) fn take_until(&mut self, stop: u32, buf: &mut Vec<u32>) -> io::Result<()> {
147-
loop {
148-
if self.next_if(stop)? {
149-
return Ok(());
150-
} else if let Some(c) = self.next_char()? {
151-
buf.push(c);
152-
} else {
153-
return Err(Error::new(
154-
ErrorKind::UnexpectedEof,
155-
format!("Unexpected end of file when looking for character {stop}"),
156-
));
157-
}
158-
}
159-
}
160-
161142
/// Fills a mutable slice up to the ends while characters are alphabetic. Returns
162143
/// the number of characters read, or `N+1` if the buffer was filled but there were
163144
/// still characters after.

core/parser/src/lexer/string.rs

Lines changed: 52 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -250,69 +250,63 @@ impl StringLiteral {
250250
{
251251
// Support \u{X..X} (Unicode CodePoint)
252252
if cursor.next_if(0x7B /* { */)? {
253-
// TODO: use bytes for a bit better performance (using stack)
254-
let mut code_point_buf = Vec::with_capacity(6);
255-
cursor.take_until(0x7D /* } */, &mut code_point_buf)?;
256-
257-
let mut s = String::with_capacity(code_point_buf.len());
258-
for c in code_point_buf {
259-
if let Some(c) = char::from_u32(c) {
260-
s.push(c);
261-
} else {
253+
let mut code_point = 0u32;
254+
let mut first_digit = true;
255+
loop {
256+
let pos = cursor.pos();
257+
let Some(c) = cursor.next_char()? else {
258+
return Err(Error::syntax(
259+
"Unexpected end of file when looking for character }",
260+
pos,
261+
));
262+
};
263+
if c == 0x7D
264+
/* } */
265+
{
266+
if first_digit {
267+
return Err(Error::syntax(
268+
"malformed Unicode character escape sequence",
269+
start_pos,
270+
));
271+
}
272+
break;
273+
}
274+
275+
let Some(digit) = char::from_u32(c).and_then(|c| c.to_digit(16)) else {
262276
return Err(Error::syntax(
263277
"malformed Unicode character escape sequence",
264278
start_pos,
265279
));
266-
}
267-
}
280+
};
268281

269-
let Ok(code_point) = u32::from_str_radix(&s, 16) else {
270-
return Err(Error::syntax(
271-
"malformed Unicode character escape sequence",
272-
start_pos,
273-
));
274-
};
282+
code_point = (code_point << 4) | digit;
275283

276-
// UTF16Encoding of a numeric code point value
277-
if code_point > 0x10_FFFF {
278-
return Err(Error::syntax(
279-
"Unicode codepoint must not be greater than 0x10FFFF in escape sequence",
280-
start_pos,
281-
));
284+
if code_point > 0x10_FFFF {
285+
return Err(Error::syntax(
286+
"Unicode codepoint must not be greater than 0x10FFFF in escape sequence",
287+
start_pos,
288+
));
289+
}
290+
first_digit = false;
282291
}
283292

284293
Ok(code_point)
285294
} else {
286295
// Grammar: Hex4Digits
287296
// Collect each character after \u e.g \uD83D will give "D83D"
288-
let mut buffer = [0u32; 4];
289-
buffer[0] = cursor
290-
.next_char()?
291-
.ok_or_else(|| Error::syntax("invalid Unicode escape sequence", start_pos))?;
292-
buffer[1] = cursor
293-
.next_char()?
294-
.ok_or_else(|| Error::syntax("invalid Unicode escape sequence", start_pos))?;
295-
buffer[2] = cursor
296-
.next_char()?
297-
.ok_or_else(|| Error::syntax("invalid Unicode escape sequence", start_pos))?;
298-
buffer[3] = cursor
299-
.next_char()?
300-
.ok_or_else(|| Error::syntax("invalid Unicode escape sequence", start_pos))?;
301-
302-
let mut s = String::with_capacity(buffer.len());
303-
for c in buffer {
304-
if let Some(c) = char::from_u32(c) {
305-
s.push(c);
306-
} else {
297+
let mut code_point = 0u32;
298+
for _ in 0..4 {
299+
let pos = cursor.pos();
300+
let c = cursor
301+
.next_char()?
302+
.ok_or_else(|| Error::syntax("invalid Unicode escape sequence", pos))?;
303+
let Some(digit) = char::from_u32(c).and_then(|c| c.to_digit(16)) else {
307304
return Err(Error::syntax("invalid Unicode escape sequence", start_pos));
308-
}
305+
};
306+
code_point = (code_point << 4) | digit;
309307
}
310308

311-
let Ok(code_point) = u16::from_str_radix(&s, 16) else {
312-
return Err(Error::syntax("invalid Unicode escape sequence", start_pos));
313-
};
314-
315-
Ok(u32::from(code_point))
309+
Ok(code_point)
316310
}
317311
}
318312

@@ -323,34 +317,22 @@ impl StringLiteral {
323317
where
324318
R: ReadChar,
325319
{
326-
let mut buffer = [0u32; 2];
327-
buffer[0] = cursor
328-
.next_char()?
329-
.ok_or_else(|| Error::syntax("invalid Hexadecimal escape sequence", start_pos))?;
330-
buffer[1] = cursor
331-
.next_char()?
332-
.ok_or_else(|| Error::syntax("invalid Hexadecimal escape sequence", start_pos))?;
333-
334-
let mut s = String::with_capacity(buffer.len());
335-
for c in buffer {
336-
if let Some(c) = char::from_u32(c) {
337-
s.push(c);
338-
} else {
320+
let mut code_point = 0u32;
321+
for _ in 0..2 {
322+
let pos = cursor.pos();
323+
let c = cursor
324+
.next_char()?
325+
.ok_or_else(|| Error::syntax("invalid Hexadecimal escape sequence", pos))?;
326+
let Some(digit) = char::from_u32(c).and_then(|c| c.to_digit(16)) else {
339327
return Err(Error::syntax(
340328
"invalid Hexadecimal escape sequence",
341329
start_pos,
342330
));
343-
}
331+
};
332+
code_point = (code_point << 4) | digit;
344333
}
345334

346-
let Ok(code_point) = u16::from_str_radix(&s, 16) else {
347-
return Err(Error::syntax(
348-
"invalid Hexadecimal escape sequence",
349-
start_pos,
350-
));
351-
};
352-
353-
Ok(u32::from(code_point))
335+
Ok(code_point)
354336
}
355337

356338
fn take_legacy_octal_escape_sequence<R>(

0 commit comments

Comments
 (0)