Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion crates/hstr/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ edition = { workspace = true }
license = { workspace = true }
name = "hstr"
repository = { workspace = true }
version = "3.0.1"
version = "3.0.3"

[lib]
bench = false
Expand Down
66 changes: 64 additions & 2 deletions crates/hstr/src/wtf8_atom.rs
Original file line number Diff line number Diff line change
Expand Up @@ -100,9 +100,43 @@ impl serde::ser::Serialize for Wtf8Atom {
// By escaping literal '\u' to '\\u', we ensure:
// - Unpaired surrogates serialize as '\uXXXX'
// - Literal '\u' text serializes as '\\uXXXX'
//
// However, we should only escape '\u' if it's followed by exactly 4 hex digits,
// which would indicate a Unicode escape sequence. Otherwise, '\u' followed by
// non-hex characters (like '\util') should not be escaped.
if c == '\\' && iter.peek().map(|cp| cp.to_u32()) == Some('u' as u32) {
iter.next(); // skip 'u'
result.push_str("\\\\u");
// Look ahead to see if this is followed by exactly 4 hex digits
let mut lookahead = iter.clone();
lookahead.next(); // skip 'u'

let mut hex_count = 0;
let mut all_hex = true;
for _ in 0..4 {
if let Some(next_cp) = lookahead.next() {
if let Some(next_c) = next_cp.to_char() {
if next_c.is_ascii_hexdigit() {
hex_count += 1;
} else {
all_hex = false;
break;
}
} else {
all_hex = false;
break;
}
} else {
all_hex = false;
break;
}
}

// Only escape if we have exactly 4 hex digits after '\u'
if hex_count == 4 && all_hex {
iter.next(); // skip 'u'
result.push_str("\\\\u");
} else {
result.push(c);
}
} else {
result.push(c)
}
Expand Down Expand Up @@ -553,4 +587,32 @@ mod tests {
let err_atom = result.unwrap_err();
assert_eq!(err_atom.to_string_lossy(), "\u{FFFD}");
}

#[test]
fn test_backslash_util_issue_11214() {
let atom =
Wtf8Atom::from("C:\\github\\swc-plugin-coverage-instrument\\spec\\util\\verifier.ts");
let serialized = serde_json::to_string(&atom).unwrap();

assert!(
!serialized.contains("spec\\\\\\\\util"),
"Found quadruple backslashes in spec segment! Serialized: {serialized}"
);

assert!(
serialized.contains("spec\\\\util"),
"Expected double backslashes in spec segment not found! Serialized: {serialized}",
);

// The expected serialized value should have consistent escaping
let expected = r#""C:\\github\\swc-plugin-coverage-instrument\\spec\\util\\verifier.ts""#;
assert_eq!(
serialized, expected,
"Serialized value should have consistent backslash escaping"
);

// Test round-trip
let deserialized: Wtf8Atom = serde_json::from_str(&serialized).unwrap();
assert_eq!(atom, deserialized);
}
}
23 changes: 22 additions & 1 deletion crates/swc_ecma_lexer/src/common/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1462,6 +1462,8 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {

let c = match c {
'\\' => '\\',
'\'' => '\'',
'"' => '"',
'n' => '\n',
'r' => '\r',
't' => '\t',
Expand Down Expand Up @@ -1557,7 +1559,26 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {

return Ok(CodePoint::from_u32(value as u32));
}
_ => c,
// For unrecognized escape sequences, return the backslash and don't consume
// the following character. According to ECMAScript, when a backslash precedes
// a character that doesn't form a valid escape sequence, both the backslash
// and the character should be preserved in the string value.
//
// However, in strict mode, unrecognized escape sequences are syntax errors.
// In template literals, they should always be errors (pre-ES2018 behavior).
_ => {
// In template literals, unrecognized escape sequences are always errors
Comment on lines +1562 to +1570
Copy link

Copilot AI Nov 18, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The new behavior for unrecognized escape sequences may not align with the ECMAScript specification. According to ES spec section 12.9.4, when a backslash precedes a character that forms a NonEscapeCharacter (like \s, \g, \a), the escape sequence should produce just the character itself, not the backslash + character.

For example, "\s" should have a string value of "s" (length 1), not "\s" (length 2).

This can be verified in Node.js:

console.log("\s");        // outputs: s
console.log("\s".length); // outputs: 1

The current implementation returns the backslash and preserves the following character, which would make "\s" have a value of "\s" (length 2).

Could you clarify if this behavior is intentional for a specific mode (e.g., TypeScript compatibility, source preservation), or if there's a misunderstanding of the ECMAScript specification?

Copilot uses AI. Check for mistakes.
if in_template {
self.error(start, SyntaxError::InvalidStrEscape)?
}

// In strict mode, unrecognized escape sequences are syntax errors
self.emit_strict_mode_error(start, SyntaxError::InvalidStrEscape);

// Don't bump - let the following character be read normally in the next
// iteration
Copy link

Copilot AI Nov 3, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The default case for unrecognized escape sequences should emit a strict mode error and handle template literals correctly. In strict mode, unrecognized escape sequences are syntax errors. In template literals (pre-ES2018), they should always be errors. The implementation should follow the pattern used for octal escapes (lines 1524-1528). Consider adding:\nrust\n_ => {\n if in_template {\n self.error(start, SyntaxError::InvalidStrEscape)?\n }\n \n self.emit_strict_mode_error(start, SyntaxError::InvalidStrEscape);\n \n return Ok(Some(CodePoint::from_char('\\\\')));\n}\n

Suggested change
// iteration
// iteration
if in_template {
self.error(start, SyntaxError::InvalidStrEscape)?;
}
self.emit_strict_mode_error(start, SyntaxError::InvalidStrEscape);

Copilot uses AI. Check for mistakes.
return Ok(Some(CodePoint::from_char('\\')));
}
};

unsafe {
Expand Down
48 changes: 48 additions & 0 deletions crates/swc_ecma_lexer/src/lexer/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2241,3 +2241,51 @@ fn issue_9106() {
]
);
}

#[test]
fn issue_11214_windows_path_escape() {
// Test for Windows file paths with backslashes
// When a backslash precedes a character that doesn't form a valid escape
// sequence, the backslash should be treated as a literal backslash
assert_eq!(
lex_tokens(
Syntax::default(),
r#""C:\\github\\swc-plugin-coverage-instrument\\spec\\util\\verifier.ts""#
),
vec![Token::Str {
value: atom!("C:\\github\\swc-plugin-coverage-instrument\\spec\\util\\verifier.ts")
.into(),
raw: atom!(r#""C:\\github\\swc-plugin-coverage-instrument\\spec\\util\\verifier.ts""#),
}]
);
}

#[test]
fn issue_11214_unrecognized_escape_sequences() {
// Test various unrecognized escape sequences
// According to ECMAScript, \s, \g, \a etc. (when not part of a valid escape)
// preserve the backslash: the value should be backslash + character
assert_eq!(
lex_tokens(Syntax::default(), r#""\s""#),
vec![Token::Str {
value: atom!(r"\s").into(),
raw: atom!(r#""\s""#),
}]
);

assert_eq!(
lex_tokens(Syntax::default(), r#""\g""#),
vec![Token::Str {
value: atom!(r"\g").into(),
raw: atom!(r#""\g""#),
}]
);

assert_eq!(
lex_tokens(Syntax::default(), r#""\a""#),
vec![Token::Str {
value: atom!(r"\a").into(),
raw: atom!(r#""\a""#),
}]
);
Comment on lines +2268 to +2290
Copy link

Copilot AI Nov 18, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These tests expect "\s" to produce a value of "\s" (two characters: backslash + s), but according to the ECMAScript specification, "\s" should produce just "s" (one character).

In JavaScript:

console.log("\s");        // outputs: s
console.log("\s".length); // outputs: 1

If the lexer changes are reverted to match ECMAScript behavior, these test expectations would need to be updated accordingly.

Copilot uses AI. Check for mistakes.
}
Loading