Skip to content

Commit 604b897

Browse files
committed
JS TextDecoder: safe and decodeBOM options
Signed-off-by: Didier Wenzek <[email protected]>
1 parent 44c8680 commit 604b897

File tree

2 files changed

+75
-8
lines changed

2 files changed

+75
-8
lines changed
Lines changed: 51 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,20 @@
11
use rquickjs::class::Trace;
2+
use rquickjs::prelude::Opt;
23
use rquickjs::Class;
34
use rquickjs::Ctx;
45
use rquickjs::Exception;
56
use rquickjs::JsLifetime;
7+
use rquickjs::Object;
68
use rquickjs::Result;
79
use rquickjs::TypedArray;
10+
use std::borrow::Cow;
811

9-
#[derive(Clone, Trace, JsLifetime)]
12+
#[derive(Clone, Default, Trace, JsLifetime)]
1013
#[rquickjs::class(frozen)]
11-
struct TextDecoder {}
14+
struct TextDecoder {
15+
fatal: bool,
16+
ignore_bom: bool,
17+
}
1218

1319
pub fn init(ctx: &Ctx<'_>) {
1420
let globals = ctx.globals();
@@ -18,22 +24,59 @@ pub fn init(ctx: &Ctx<'_>) {
1824
#[rquickjs::methods]
1925
impl<'js> TextDecoder {
2026
#[qjs(constructor)]
21-
fn new() -> TextDecoder {
22-
TextDecoder {}
27+
fn new(ctx: Ctx<'js>, label: Opt<String>, options: Opt<Object<'js>>) -> Result<TextDecoder> {
28+
if let Some(label) = label.into_inner() {
29+
if label != "utf-8" && label != "utf8" {
30+
return Err(Exception::throw_message(
31+
&ctx,
32+
"TextDecoder only supports utf-8",
33+
));
34+
}
35+
}
36+
let decoder = options.into_inner().map(|options| {
37+
let fatal = options.get("fatal").ok().flatten().unwrap_or(false);
38+
let ignore_bom = options.get("ignoreBOM").ok().flatten().unwrap_or(false);
39+
TextDecoder { fatal, ignore_bom }
40+
});
41+
42+
Ok(decoder.unwrap_or_default())
2343
}
2444

2545
#[qjs(get)]
2646
fn encoding(&self) -> &str {
2747
"utf-8"
2848
}
2949

50+
#[qjs(get)]
51+
fn fatal(&self) -> bool {
52+
self.fatal
53+
}
54+
55+
#[qjs(get, rename = "ignoreBOM")]
56+
fn ignore_bom(&self) -> bool {
57+
self.ignore_bom
58+
}
59+
3060
pub fn decode(&self, ctx: Ctx<'js>, bytes: TypedArray<'js, u8>) -> Result<String> {
31-
let bytes = bytes
61+
let mut bytes = bytes
3262
.as_bytes()
3363
.ok_or(Exception::throw_message(&ctx, "ArrayBuffer is detached"))?;
34-
let text = std::str::from_utf8(bytes)
35-
.map_err(|err| Exception::throw_message(&ctx, &err.to_string()))?;
3664

37-
Ok(text.to_owned())
65+
if !self.ignore_bom && bytes.get(..3) == Some(&[0xEF, 0xBB, 0xBF]) {
66+
bytes = &bytes[3..];
67+
}
68+
69+
let text = if self.fatal {
70+
std::str::from_utf8(bytes)
71+
.map_err(|err| Exception::throw_message(&ctx, &err.to_string()))?
72+
.to_string()
73+
} else {
74+
match String::from_utf8_lossy(bytes) {
75+
Cow::Owned(text) => text,
76+
Cow::Borrowed(text) => text.to_owned(),
77+
}
78+
};
79+
80+
Ok(text)
3881
}
3982
}

crates/extensions/tedge_flows/src/js_script.rs

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -396,6 +396,30 @@ export async function onMessage(message, config) {
396396
);
397397
}
398398

399+
#[tokio::test]
400+
async fn decode_utf8_with_bom_and_invalid_chars() {
401+
let js = r#"
402+
export async function onMessage(message, config) {
403+
const utf8decoder = new TextDecoder();
404+
const encodedText = message.raw_payload;
405+
const decodedText = utf8decoder.decode(encodedText);
406+
return [{topic:"decoded", payload: decodedText}];
407+
}
408+
"#;
409+
let (runtime, script) = runtime_with(js).await;
410+
411+
let utf8_with_bom_and_invalid_chars = b"\xEF\xBB\xBFHello \xF0\x90\x80World";
412+
let input = Message::new_binary("encoded", utf8_with_bom_and_invalid_chars);
413+
let output = Message::new("decoded", "Hello �World");
414+
assert_eq!(
415+
script
416+
.on_message(&runtime, &DateTime::now(), &input)
417+
.await
418+
.unwrap(),
419+
vec![output]
420+
);
421+
}
422+
399423
#[tokio::test]
400424
async fn using_standard_built_in_objects() {
401425
let js = r#"

0 commit comments

Comments
 (0)