Skip to content

Commit 210b309

Browse files
authored
fix(ext/web): handle empty TextDecoder input when stream is true (#31691)
for: #31605 (comment) fixes the issue where TextDecoder with stream: true and an empty chunk would return incorrect results. It resolves the chunking problem for legacy encodings like big5, shift_jis, and euc-kr.
1 parent 67ab6be commit 210b309

File tree

2 files changed

+57
-0
lines changed

2 files changed

+57
-0
lines changed

ext/web/08_text_encoding.js

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,10 @@ class TextDecoder {
119119
stream = options.stream;
120120
}
121121

122+
if (stream && input.length === 0) {
123+
return "";
124+
}
125+
122126
try {
123127
/** @type {ArrayBufferLike} */
124128
let buffer = input;

tests/unit/text_encoding_test.ts

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -324,3 +324,56 @@ Deno.test(
324324
assertStrictEquals(cancelled, true);
325325
},
326326
);
327+
328+
Deno.test(
329+
"TextDecoder should handle empty chunk in stream mode (legacy encodings)",
330+
() => {
331+
// big5: [0xa4, 0xa4] => "中" (U+4E2D)
332+
{
333+
const u8 = new Uint8Array([0xa4, 0xa4]);
334+
const str = new TextDecoder("big5").decode(u8);
335+
assertEquals(str, "\u4e2d");
336+
337+
const d = new TextDecoder("big5");
338+
const chunks = [
339+
d.decode(u8.subarray(0, 1), { stream: true }),
340+
d.decode(u8.subarray(1), { stream: true }),
341+
d.decode(new Uint8Array(), { stream: true }),
342+
d.decode(),
343+
];
344+
assertEquals(chunks.join(""), str);
345+
}
346+
347+
// shift_jis: [0x82, 0xa0] => "あ" (U+3042)
348+
{
349+
const u8 = new Uint8Array([0x82, 0xa0]);
350+
const str = new TextDecoder("shift_jis").decode(u8);
351+
assertEquals(str, "\u3042");
352+
353+
const d = new TextDecoder("shift_jis");
354+
const chunks = [
355+
d.decode(u8.subarray(0, 1), { stream: true }),
356+
d.decode(u8.subarray(1), { stream: true }),
357+
d.decode(new Uint8Array(), { stream: true }),
358+
d.decode(),
359+
];
360+
assertEquals(chunks.join(""), str);
361+
}
362+
363+
// euc-kr: [0xb0, 0xa1] => "가" (U+AC00)
364+
{
365+
const u8 = new Uint8Array([0xb0, 0xa1]);
366+
const str = new TextDecoder("euc-kr").decode(u8);
367+
assertEquals(str, "\uac00");
368+
369+
const d = new TextDecoder("euc-kr");
370+
const chunks = [
371+
d.decode(u8.subarray(0, 1), { stream: true }),
372+
d.decode(u8.subarray(1), { stream: true }),
373+
d.decode(new Uint8Array(), { stream: true }),
374+
d.decode(),
375+
];
376+
assertEquals(chunks.join(""), str);
377+
}
378+
},
379+
);

0 commit comments

Comments
 (0)