Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion crates/hstr/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ edition = { workspace = true }
license = { workspace = true }
name = "hstr"
repository = { workspace = true }
version = "3.0.1"
version = "3.0.3"

[lib]
bench = false
Expand Down
66 changes: 64 additions & 2 deletions crates/hstr/src/wtf8_atom.rs
Original file line number Diff line number Diff line change
Expand Up @@ -100,9 +100,43 @@ impl serde::ser::Serialize for Wtf8Atom {
// By escaping literal '\u' to '\\u', we ensure:
// - Unpaired surrogates serialize as '\uXXXX'
// - Literal '\u' text serializes as '\\uXXXX'
//
// However, we should only escape '\u' if it's followed by exactly 4 hex digits,
// which would indicate a Unicode escape sequence. Otherwise, '\u' followed by
// non-hex characters (like '\util') should not be escaped.
if c == '\\' && iter.peek().map(|cp| cp.to_u32()) == Some('u' as u32) {
iter.next(); // skip 'u'
result.push_str("\\\\u");
// Look ahead to see if this is followed by exactly 4 hex digits
let mut lookahead = iter.clone();
lookahead.next(); // skip 'u'

let mut hex_count = 0;
let mut all_hex = true;
for _ in 0..4 {
if let Some(next_cp) = lookahead.next() {
if let Some(next_c) = next_cp.to_char() {
if next_c.is_ascii_hexdigit() {
hex_count += 1;
} else {
all_hex = false;
break;
}
} else {
all_hex = false;
break;
}
} else {
all_hex = false;
break;
}
}

// Only escape if we have exactly 4 hex digits after '\u'
if hex_count == 4 && all_hex {
iter.next(); // skip 'u'
result.push_str("\\\\u");
} else {
result.push(c);
}
} else {
result.push(c)
}
Expand Down Expand Up @@ -553,4 +587,32 @@ mod tests {
let err_atom = result.unwrap_err();
assert_eq!(err_atom.to_string_lossy(), "\u{FFFD}");
}

#[test]
fn test_backslash_util_issue_11214() {
let atom =
Wtf8Atom::from("C:\\github\\swc-plugin-coverage-instrument\\spec\\util\\verifier.ts");
let serialized = serde_json::to_string(&atom).unwrap();

assert!(
!serialized.contains("spec\\\\\\\\util"),
"Found quadruple backslashes in spec segment! Serialized: {serialized}"
);

assert!(
serialized.contains("spec\\\\util"),
"Expected double backslashes in spec segment not found! Serialized: {serialized}",
);

// The expected serialized value should have consistent escaping
let expected = r#""C:\\github\\swc-plugin-coverage-instrument\\spec\\util\\verifier.ts""#;
assert_eq!(
serialized, expected,
"Serialized value should have consistent backslash escaping"
);

// Test round-trip
let deserialized: Wtf8Atom = serde_json::from_str(&serialized).unwrap();
assert_eq!(atom, deserialized);
}
}
12 changes: 11 additions & 1 deletion crates/swc_ecma_lexer/src/common/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1462,6 +1462,8 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {

let c = match c {
'\\' => '\\',
'\'' => '\'',
'"' => '"',
'n' => '\n',
'r' => '\r',
't' => '\t',
Expand Down Expand Up @@ -1557,7 +1559,15 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {

return Ok(CodePoint::from_u32(value as u32));
}
_ => c,
// For unrecognized escape sequences, return the backslash and don't consume
// the following character. According to ECMAScript, when a backslash precedes
// a character that doesn't form a valid escape sequence, both the backslash
// and the character should be preserved in the string value.
_ => {
// Don't bump - let the following character be read normally in the next
// iteration
Copy link

Copilot AI Nov 3, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The default case for unrecognized escape sequences should emit a strict mode error and handle template literals correctly. In strict mode, unrecognized escape sequences are syntax errors. In template literals (pre-ES2018), they should always be errors. The implementation should follow the pattern used for octal escapes (lines 1524-1528). Consider adding:\nrust\n_ => {\n if in_template {\n self.error(start, SyntaxError::InvalidStrEscape)?\n }\n \n self.emit_strict_mode_error(start, SyntaxError::InvalidStrEscape);\n \n return Ok(Some(CodePoint::from_char('\\\\')));\n}\n

Suggested change
// iteration
// iteration
if in_template {
self.error(start, SyntaxError::InvalidStrEscape)?;
}
self.emit_strict_mode_error(start, SyntaxError::InvalidStrEscape);

Copilot uses AI. Check for mistakes.
return Ok(Some(CodePoint::from_char('\\')));
}
};

unsafe {
Expand Down
48 changes: 48 additions & 0 deletions crates/swc_ecma_lexer/src/lexer/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2241,3 +2241,51 @@ fn issue_9106() {
]
);
}

#[test]
fn issue_11214_windows_path_escape() {
// Test for Windows file paths with backslashes
// When a backslash precedes a character that doesn't form a valid escape
// sequence, the backslash should be treated as a literal backslash
assert_eq!(
lex_tokens(
Syntax::default(),
r#""C:\\github\\swc-plugin-coverage-instrument\\spec\\util\\verifier.ts""#
),
vec![Token::Str {
value: atom!("C:\\github\\swc-plugin-coverage-instrument\\spec\\util\\verifier.ts")
.into(),
raw: atom!(r#""C:\\github\\swc-plugin-coverage-instrument\\spec\\util\\verifier.ts""#),
}]
);
}

#[test]
fn issue_11214_unrecognized_escape_sequences() {
// Test various unrecognized escape sequences
// According to ECMAScript, \s, \g, \a etc. (when not part of a valid escape)
// preserve the backslash: the value should be backslash + character
assert_eq!(
lex_tokens(Syntax::default(), r#""\s""#),
vec![Token::Str {
value: atom!(r"\s").into(),
raw: atom!(r#""\s""#),
}]
);

assert_eq!(
lex_tokens(Syntax::default(), r#""\g""#),
vec![Token::Str {
value: atom!(r"\g").into(),
raw: atom!(r#""\g""#),
}]
);

assert_eq!(
lex_tokens(Syntax::default(), r#""\a""#),
vec![Token::Str {
value: atom!(r"\a").into(),
raw: atom!(r#""\a""#),
}]
);
Comment on lines +2268 to +2290
Copy link

Copilot AI Nov 18, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These tests expect "\s" to produce a value of "\s" (two characters: backslash + s), but according to the ECMAScript specification, "\s" should produce just "s" (one character).

In JavaScript:

console.log("\s");        // outputs: s
console.log("\s".length); // outputs: 1

If the lexer changes are reverted to match ECMAScript behavior, these test expectations would need to be updated accordingly.

Copilot uses AI. Check for mistakes.
}