Skip to content

Commit d2d29a8

Browse files
authored
fix TextDecoder label parsing (#5162)
Fixes #5135. This makes `TextDecoder` normalize supported encoding labels before matching them. It now accepts supported labels with ASCII case differences, surrounding ASCII whitespace, and the supported aliases for the UTF-8 and UTF-16 encodings Boa currently implements. Tests: - `cargo test -p boa_runtime text --lib` - `cargo test -p boa_runtime --lib` - `cargo clippy -p boa_runtime --all-features --all-targets -- -D warnings`
1 parent c9aaad1 commit d2d29a8

File tree

2 files changed

+88
-11
lines changed

2 files changed

+88
-11
lines changed

core/runtime/src/text/mod.rs

Lines changed: 35 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,35 @@ pub enum Encoding {
3434
Utf16Be,
3535
}
3636

37+
const TEXT_DECODER_LABELS: &[(&str, Encoding)] = &[
38+
("unicode-1-1-utf-8", Encoding::Utf8),
39+
("unicode11utf8", Encoding::Utf8),
40+
("unicode20utf8", Encoding::Utf8),
41+
("utf-8", Encoding::Utf8),
42+
("utf8", Encoding::Utf8),
43+
("x-unicode20utf8", Encoding::Utf8),
44+
("unicodefffe", Encoding::Utf16Be),
45+
("utf-16be", Encoding::Utf16Be),
46+
("csunicode", Encoding::Utf16Le),
47+
("iso-10646-ucs-2", Encoding::Utf16Le),
48+
("ucs-2", Encoding::Utf16Le),
49+
("unicode", Encoding::Utf16Le),
50+
("unicodefeff", Encoding::Utf16Le),
51+
("utf-16", Encoding::Utf16Le),
52+
("utf-16le", Encoding::Utf16Le),
53+
];
54+
55+
#[inline]
56+
fn resolve_text_decoder_label(label: &str) -> Option<Encoding> {
57+
let label = label.trim_matches(['\u{0009}', '\u{000A}', '\u{000C}', '\u{000D}', '\u{0020}']);
58+
59+
TEXT_DECODER_LABELS
60+
.iter()
61+
.find_map(|(supported, encoding)| {
62+
label.eq_ignore_ascii_case(supported).then_some(*encoding)
63+
})
64+
}
65+
3766
/// The [`TextDecoder`][mdn] class represents an encoder for a specific method, that is
3867
/// a specific character encoding, like `utf-8`.
3968
///
@@ -62,17 +91,12 @@ impl TextDecoder {
6291
let ignore_bom = options.and_then(|o| o.ignore_bom).unwrap_or(false);
6392

6493
let encoding = match encoding {
65-
Some(enc) => match enc.to_std_string_lossy().as_str() {
66-
"utf-8" => Encoding::Utf8,
67-
// Default encoding is Little Endian.
68-
"utf-16" | "utf-16le" => Encoding::Utf16Le,
69-
"utf-16be" => Encoding::Utf16Be,
70-
e => {
71-
return Err(
72-
js_error!(RangeError: "The given encoding '{}' is not supported.", e),
73-
);
74-
}
75-
},
94+
Some(enc) => {
95+
let label = enc.to_std_string_lossy();
96+
resolve_text_decoder_label(&label).ok_or_else(
97+
|| js_error!(RangeError: "The given encoding '{}' is not supported.", label),
98+
)?
99+
}
76100
None => Encoding::default(),
77101
};
78102

core/runtime/src/text/tests.rs

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -288,6 +288,59 @@ fn decoder_bom_ignore_bom_false(encoding: &'static str, bytes: &'static [u8]) {
288288
);
289289
}
290290

291+
#[test_case("UTF-8", "utf-8"; "uppercase utf8")]
292+
#[test_case(" utf-8 ", "utf-8"; "spaced utf8")]
293+
#[test_case("\nutf-16\t", "utf-16le"; "spaced utf16")]
294+
#[test_case("UTF-16BE", "utf-16be"; "uppercase utf16be")]
295+
#[test_case("utf8", "utf-8"; "utf8 alias")]
296+
#[test_case("Unicode-1-1-UTF-8", "utf-8"; "unicode alias")]
297+
#[test_case("csUnicode", "utf-16le"; "csunicode alias")]
298+
#[test_case(" unicodefeff ", "utf-16le"; "unicodefeff alias")]
299+
#[test_case("UnicodeFFFE", "utf-16be"; "unicodefffe alias")]
300+
fn decoder_normalizes_supported_labels(label: &'static str, expected: &'static str) {
301+
let context = &mut Context::default();
302+
text::register(None, context).unwrap();
303+
304+
run_test_actions_with(
305+
[
306+
TestAction::run(format!(
307+
r#"
308+
const d = new TextDecoder({label:?});
309+
encoding = d.encoding;
310+
"#
311+
)),
312+
TestAction::inspect_context(move |context| {
313+
let encoding = context
314+
.global_object()
315+
.get(js_str!("encoding"), context)
316+
.unwrap();
317+
assert_eq!(encoding.as_string(), Some(JsString::from(expected)));
318+
}),
319+
],
320+
context,
321+
);
322+
}
323+
324+
#[test]
325+
fn decoder_rejects_unsupported_label_after_normalization() {
326+
let context = &mut Context::default();
327+
text::register(None, context).unwrap();
328+
329+
run_test_actions_with(
330+
[TestAction::run(indoc! {r#"
331+
try {
332+
new TextDecoder(" utf-32 ");
333+
throw new Error("expected RangeError");
334+
} catch (e) {
335+
if (!(e instanceof RangeError)) {
336+
throw e;
337+
}
338+
}
339+
"#})],
340+
context,
341+
);
342+
}
343+
291344
#[test]
292345
fn decoder_ignore_bom_getter() {
293346
let context = &mut Context::default();

0 commit comments

Comments
 (0)