Skip to content

Commit 6a71e8f

Browse files
authored
Merge pull request #1140 from Dig-Doug/improve-decoding-speed
Improves the utf8 decoder performance
2 parents cc856f3 + 55b05b9 commit 6a71e8f

File tree

3 files changed

+8411
-8103
lines changed

3 files changed

+8411
-8103
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,3 +63,6 @@ esm/
6363
package-lock.json
6464

6565
wasm_test_dir
66+
67+
# IntelliJ
68+
.idea/

packages/utf8/src/decoder.js

Lines changed: 23 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -21,66 +21,47 @@ export function decode(bytes) {
2121
}
2222

2323
function _decode(bytes) {
24-
if (bytes.length === 0) {
25-
return [];
26-
}
27-
28-
/**
29-
* 1 byte
30-
*/
31-
{
32-
const [b1, ...bs] = bytes;
33-
24+
const result = [];
25+
while (bytes.length > 0) {
26+
const b1 = bytes[0];
3427
if (b1 < 0x80) {
35-
return [code(0x0, b1), ..._decode(bs)];
28+
result.push(code(0x0, b1));
29+
bytes = bytes.slice(1);
30+
continue;
3631
}
3732

3833
if (b1 < 0xc0) {
3934
throw new Error("invalid UTF-8 encoding");
4035
}
41-
}
42-
43-
/**
44-
* 2 bytes
45-
*/
46-
{
47-
const [b1, b2, ...bs] = bytes;
4836

37+
const b2 = bytes[1];
4938
if (b1 < 0xe0) {
50-
return [code(0x80, ((b1 & 0x1f) << 6) + con(b2)), ..._decode(bs)];
39+
result.push(code(0x80, ((b1 & 0x1f) << 6) + con(b2)));
40+
bytes = bytes.slice(2);
41+
continue;
5142
}
52-
}
53-
54-
/**
55-
* 3 bytes
56-
*/
57-
{
58-
const [b1, b2, b3, ...bs] = bytes;
5943

44+
const b3 = bytes[2];
6045
if (b1 < 0xf0) {
61-
return [
62-
code(0x800, ((b1 & 0x0f) << 12) + (con(b2) << 6) + con(b3)),
63-
..._decode(bs),
64-
];
46+
result.push(code(0x800, ((b1 & 0x0f) << 12) + (con(b2) << 6) + con(b3)));
47+
bytes = bytes.slice(3);
48+
continue;
6549
}
66-
}
67-
68-
/**
69-
* 4 bytes
70-
*/
71-
{
72-
const [b1, b2, b3, b4, ...bs] = bytes;
7350

51+
const b4 = bytes[3];
7452
if (b1 < 0xf8) {
75-
return [
53+
result.push(
7654
code(
7755
0x10000,
7856
((((b1 & 0x07) << 18) + con(b2)) << 12) + (con(b3) << 6) + con(b4)
79-
),
80-
..._decode(bs),
81-
];
57+
)
58+
);
59+
bytes = bytes.slice(4);
60+
continue;
8261
}
62+
63+
throw new Error("invalid UTF-8 encoding");
8364
}
8465

85-
throw new Error("invalid UTF-8 encoding");
66+
return result;
8667
}

0 commit comments

Comments
 (0)