swc-project · delino · Nov 3, 2025 · Nov 18, 2025 · Nov 18, 2025 · Nov 18, 2025
@@ -5,7 +5,7 @@ edition     = { workspace = true }
 license     = { workspace = true }
 name        = "hstr"
 repository  = { workspace = true }
-version     = "3.0.1"
+version     = "3.0.3"
 
 [lib]
 bench = false

@@ -100,9 +100,43 @@ impl serde::ser::Serialize for Wtf8Atom {
                     // By escaping literal '\u' to '\\u', we ensure:
                     // - Unpaired surrogates serialize as '\uXXXX'
                     // - Literal '\u' text serializes as '\\uXXXX'
+                    //
+                    // However, we should only escape '\u' if it's followed by exactly 4 hex digits,
+                    // which would indicate a Unicode escape sequence. Otherwise, '\u' followed by
+                    // non-hex characters (like '\util') should not be escaped.
                     if c == '\\' && iter.peek().map(|cp| cp.to_u32()) == Some('u' as u32) {
-                        iter.next(); // skip 'u'
-                        result.push_str("\\\\u");
+                        // Look ahead to see if this is followed by exactly 4 hex digits
+                        let mut lookahead = iter.clone();
+                        lookahead.next(); // skip 'u'
+
+                        let mut hex_count = 0;
+                        let mut all_hex = true;
+                        for _ in 0..4 {
+                            if let Some(next_cp) = lookahead.next() {
+                                if let Some(next_c) = next_cp.to_char() {
+                                    if next_c.is_ascii_hexdigit() {
+                                        hex_count += 1;
+                                    } else {
+                                        all_hex = false;
+                                        break;
+                                    }
+                                } else {
+                                    all_hex = false;
+                                    break;
+                                }
+                            } else {
+                                all_hex = false;
+                                break;
+                            }
+                        }
+
+                        // Only escape if we have exactly 4 hex digits after '\u'
+                        if hex_count == 4 && all_hex {
+                            iter.next(); // skip 'u'
+                            result.push_str("\\\\u");
+                        } else {
+                            result.push(c);
+                        }
                     } else {
                         result.push(c)
                     }
@@ -553,4 +587,32 @@ mod tests {
         let err_atom = result.unwrap_err();
         assert_eq!(err_atom.to_string_lossy(), "\u{FFFD}");
     }
+
+    #[test]
+    fn test_backslash_util_issue_11214() {
+        let atom =
+            Wtf8Atom::from("C:\\github\\swc-plugin-coverage-instrument\\spec\\util\\verifier.ts");
+        let serialized = serde_json::to_string(&atom).unwrap();
+
+        assert!(
+            !serialized.contains("spec\\\\\\\\util"),
+            "Found quadruple backslashes in spec segment! Serialized: {serialized}"
+        );
+
+        assert!(
+            serialized.contains("spec\\\\util"),
+            "Expected double backslashes in spec segment not found! Serialized: {serialized}",
+        );
+
+        // The expected serialized value should have consistent escaping
+        let expected = r#""C:\\github\\swc-plugin-coverage-instrument\\spec\\util\\verifier.ts""#;
+        assert_eq!(
+            serialized, expected,
+            "Serialized value should have consistent backslash escaping"
+        );
+
+        // Test round-trip
+        let deserialized: Wtf8Atom = serde_json::from_str(&serialized).unwrap();
+        assert_eq!(atom, deserialized);
+    }
 }
@@ -1462,6 +1462,8 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
 
         let c = match c {
             '\\' => '\\',
+            '\'' => '\'',
+            '"' => '"',
             'n' => '\n',
             'r' => '\r',
             't' => '\t',
@@ -1557,7 +1559,26 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
 
                 return Ok(CodePoint::from_u32(value as u32));
             }
-            _ => c,
+            // For unrecognized escape sequences, return the backslash and don't consume
+            // the following character. According to ECMAScript, when a backslash precedes
+            // a character that doesn't form a valid escape sequence, both the backslash
+            // and the character should be preserved in the string value.
+            //
+            // However, in strict mode, unrecognized escape sequences are syntax errors.
+            // In template literals, they should always be errors (pre-ES2018 behavior).
+            _ => {
+                // In template literals, unrecognized escape sequences are always errors
+                if in_template {
+                    self.error(start, SyntaxError::InvalidStrEscape)?
+                }
+
+                // In strict mode, unrecognized escape sequences are syntax errors
+                self.emit_strict_mode_error(start, SyntaxError::InvalidStrEscape);
+
+                // Don't bump - let the following character be read normally in the next
+                // iteration
-                // iteration
+                // iteration
+                if in_template {
+                    self.error(start, SyntaxError::InvalidStrEscape)?;
+                }
+                self.emit_strict_mode_error(start, SyntaxError::InvalidStrEscape);
-                // iteration
+                // iteration
+                if in_template {
+                    self.error(start, SyntaxError::InvalidStrEscape)?;
+                }
+                self.emit_strict_mode_error(start, SyntaxError::InvalidStrEscape);
+                return Ok(Some(CodePoint::from_char('\\')));
+            }
         };
 
         unsafe {

@@ -2241,3 +2241,51 @@ fn issue_9106() {
         ]
     );
 }
+
+#[test]
+fn issue_11214_windows_path_escape() {
+    // Test for Windows file paths with backslashes
+    // When a backslash precedes a character that doesn't form a valid escape
+    // sequence, the backslash should be treated as a literal backslash
+    assert_eq!(
+        lex_tokens(
+            Syntax::default(),
+            r#""C:\\github\\swc-plugin-coverage-instrument\\spec\\util\\verifier.ts""#
+        ),
+        vec![Token::Str {
+            value: atom!("C:\\github\\swc-plugin-coverage-instrument\\spec\\util\\verifier.ts")
+                .into(),
+            raw: atom!(r#""C:\\github\\swc-plugin-coverage-instrument\\spec\\util\\verifier.ts""#),
+        }]
+    );
+}
+
+#[test]
+fn issue_11214_unrecognized_escape_sequences() {
+    // Test various unrecognized escape sequences
+    // According to ECMAScript, \s, \g, \a etc. (when not part of a valid escape)
+    // preserve the backslash: the value should be backslash + character
+    assert_eq!(
+        lex_tokens(Syntax::default(), r#""\s""#),
+        vec![Token::Str {
+            value: atom!(r"\s").into(),
+            raw: atom!(r#""\s""#),
+        }]
+    );
+
+    assert_eq!(
+        lex_tokens(Syntax::default(), r#""\g""#),
+        vec![Token::Str {
+            value: atom!(r"\g").into(),
+            raw: atom!(r#""\g""#),
+        }]
+    );
+
+    assert_eq!(
+        lex_tokens(Syntax::default(), r#""\a""#),
+        vec![Token::Str {
+            value: atom!(r"\a").into(),
+            raw: atom!(r#""\a""#),
+        }]
+    );
+}