Skip to content

Commit 2076ba3

Browse files
kwonojkdy1
authored andcommitted
fix(hstr): Skip only \u for unicode (#11216)
**Description:** Attempt to fix #11214. The reason for unespaced `\\` was special handling for unicode `\u` codepoint, that escapes any string literal starts with `\u` - for windows, path separator with u (`\\u`) matches with this case. PR attempts to solve by lookahead, confirming all 4 following char is hex digit to represent unicode hex, otherwise consider it as plain string. To be honest I'm not sure if this is acceptable approach or not, feel free to close if there's better way to fix. **Related issue:** - Closes #11214
1 parent c90e71f commit 2076ba3

File tree

4 files changed

+143
-2
lines changed

4 files changed

+143
-2
lines changed

.changeset/rotten-frogs-drive.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
---
2+
swc_core: patch
3+
hstr: patch
4+
---
5+
6+
fix(atom): skip only unicode \u

crates/hstr/src/wtf8_atom.rs

Lines changed: 64 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,9 +99,43 @@ impl serde::ser::Serialize for Wtf8Atom {
9999
// By escaping literal '\u' to '\\u', we ensure:
100100
// - Unpaired surrogates serialize as '\uXXXX'
101101
// - Literal '\u' text serializes as '\\uXXXX'
102+
//
103+
// However, we should only escape '\u' if it's followed by exactly 4 hex digits,
104+
// which would indicate a Unicode escape sequence. Otherwise, '\u' followed by
105+
// non-hex characters (like '\util') should not be escaped.
102106
if c == '\\' && iter.peek().map(|cp| cp.to_u32()) == Some('u' as u32) {
103-
iter.next(); // skip 'u'
104-
result.push_str("\\\\u");
107+
// Look ahead to see if this is followed by exactly 4 hex digits
108+
let mut lookahead = iter.clone();
109+
lookahead.next(); // skip 'u'
110+
111+
let mut hex_count = 0;
112+
let mut all_hex = true;
113+
for _ in 0..4 {
114+
if let Some(next_cp) = lookahead.next() {
115+
if let Some(next_c) = next_cp.to_char() {
116+
if next_c.is_ascii_hexdigit() {
117+
hex_count += 1;
118+
} else {
119+
all_hex = false;
120+
break;
121+
}
122+
} else {
123+
all_hex = false;
124+
break;
125+
}
126+
} else {
127+
all_hex = false;
128+
break;
129+
}
130+
}
131+
132+
// Only escape if we have exactly 4 hex digits after '\u'
133+
if hex_count == 4 && all_hex {
134+
iter.next(); // skip 'u'
135+
result.push_str("\\\\u");
136+
} else {
137+
result.push(c);
138+
}
105139
} else {
106140
result.push(c)
107141
}
@@ -551,4 +585,32 @@ mod tests {
551585
let err_atom = result.unwrap_err();
552586
assert_eq!(err_atom.to_string_lossy(), "\u{FFFD}");
553587
}
588+
589+
#[test]
590+
fn test_backslash_util_issue_11214() {
591+
let atom =
592+
Wtf8Atom::from("C:\\github\\swc-plugin-coverage-instrument\\spec\\util\\verifier.ts");
593+
let serialized = serde_json::to_string(&atom).unwrap();
594+
595+
assert!(
596+
!serialized.contains("spec\\\\\\\\util"),
597+
"Found quadruple backslashes in spec segment! Serialized: {serialized}"
598+
);
599+
600+
assert!(
601+
serialized.contains("spec\\\\util"),
602+
"Expected double backslashes in spec segment not found! Serialized: {serialized}",
603+
);
604+
605+
// The expected serialized value should have consistent escaping
606+
let expected = r#""C:\\github\\swc-plugin-coverage-instrument\\spec\\util\\verifier.ts""#;
607+
assert_eq!(
608+
serialized, expected,
609+
"Serialized value should have consistent backslash escaping"
610+
);
611+
612+
// Test round-trip
613+
let deserialized: Wtf8Atom = serde_json::from_str(&serialized).unwrap();
614+
assert_eq!(atom, deserialized);
615+
}
554616
}
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
var coverageData = {
2+
path: "C:\\github\\swc-plugin-coverage-instrument\\spec\\util\\verifier.ts",
3+
}
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
{
2+
"type": "Script",
3+
"span": {
4+
"start": 1,
5+
"end": 107
6+
},
7+
"body": [
8+
{
9+
"type": "VariableDeclaration",
10+
"span": {
11+
"start": 1,
12+
"end": 107
13+
},
14+
"ctxt": 0,
15+
"kind": "var",
16+
"declare": false,
17+
"declarations": [
18+
{
19+
"type": "VariableDeclarator",
20+
"span": {
21+
"start": 5,
22+
"end": 107
23+
},
24+
"id": {
25+
"type": "Identifier",
26+
"span": {
27+
"start": 5,
28+
"end": 17
29+
},
30+
"ctxt": 0,
31+
"value": "coverageData",
32+
"optional": false,
33+
"typeAnnotation": null
34+
},
35+
"init": {
36+
"type": "ObjectExpression",
37+
"span": {
38+
"start": 20,
39+
"end": 107
40+
},
41+
"properties": [
42+
{
43+
"type": "KeyValueProperty",
44+
"key": {
45+
"type": "Identifier",
46+
"span": {
47+
"start": 26,
48+
"end": 30
49+
},
50+
"value": "path"
51+
},
52+
"value": {
53+
"type": "StringLiteral",
54+
"span": {
55+
"start": 32,
56+
"end": 101
57+
},
58+
"value": "C:\\github\\swc-plugin-coverage-instrument\\spec\\util\\verifier.ts",
59+
"raw": "\"C:\\\\github\\\\swc-plugin-coverage-instrument\\\\spec\\\\util\\\\verifier.ts\""
60+
}
61+
}
62+
]
63+
},
64+
"definite": false
65+
}
66+
]
67+
}
68+
],
69+
"interpreter": null
70+
}

0 commit comments

Comments
 (0)