Skip to content

Commit e1e1a11

Browse files
committed
perf: move all 1-byte encodings to native
1 parent efa5be8 commit e1e1a11

File tree

7 files changed

+435
-133
lines changed

7 files changed

+435
-133
lines changed

lib/internal/encoding.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -460,7 +460,7 @@ function makeTextDecoderICU() {
460460
validateDecoder(this);
461461
validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);
462462

463-
if (this[kMethod]) return this[kMethod](parseInput(input));
463+
if (this[kMethod]) return this[kMethod](input);
464464

465465
this[kUTF8FastPath] &&= !(options?.stream);
466466

Lines changed: 36 additions & 132 deletions
Original file line numberDiff line numberDiff line change
@@ -1,155 +1,59 @@
1-
// Simplified version extracted from https://npmjs.com/package/@exodus/bytes codepath for 1-byte encodings
2-
// Copyright Exodus Movement. Licensed under MIT License.
3-
41
'use strict';
52

63
const {
7-
Array,
8-
ArrayPrototypeFill,
9-
ObjectKeys,
10-
ObjectPrototypeHasOwnProperty,
114
SafeArrayIterator,
12-
SafeMap,
135
SafeSet,
14-
StringPrototypeIncludes,
15-
TypedArrayFrom,
16-
TypedArrayOf,
17-
TypedArrayPrototypeIncludes,
18-
TypedArrayPrototypeSet,
19-
Uint16Array,
206
} = primordials;
217

22-
const { isAscii } = require('buffer');
23-
24-
const { FastBuffer } = require('internal/buffer');
25-
268
const {
279
ERR_ENCODING_NOT_SUPPORTED,
28-
ERR_ENCODING_INVALID_ENCODED_DATA,
2910
} = require('internal/errors').codes;
3011

31-
const isBigEndian = new FastBuffer(TypedArrayOf(Uint16Array, 258).buffer)[1] === 2;
32-
3312
const it = (x) => new SafeArrayIterator(x);
3413

35-
/* fallback/single-byte.encodings.js */
36-
37-
const r = 0xfffd;
38-
const e = (x) => it(ArrayPrototypeFill(new Array(x), 1));
39-
const h = (x) => it(ArrayPrototypeFill(new Array(x), r));
40-
41-
/* eslint-disable @stylistic/js/max-len */
42-
43-
// Index tables from https://encoding.spec.whatwg.org/#legacy-single-byte-encodings
44-
// Each table in the spec lists only mapping from byte 0x80 onwards, as below that they are all ASCII and mapped as idenity
45-
// Here, 0xfffd (replacement charcode) designates a hole (unmapped offset), as not all encodings map all offsets
46-
// All other numbers are deltas from the last seen mapped value, starting with 0x7f (127, highest ASCII)
47-
// Thus, [0x80, 0x81, , 0x83] is stored as [1, 1, r, 2]
48-
// Truncation (length < 128) means that all remaining ones are mapped as identity (offset i => codepoint i), not unmapped
49-
const encodings = {
50-
'__proto__': null,
51-
'ibm866': [913, ...e(47), 8530, 1, 1, -145, 34, 61, 1, -12, -1, 14, -18, 6, 6, -1, -1, -75, 4, 32, -8, -16, -28, 60, 34, 1, -5, -6, 21, -3, -6, -16, 28, -5, 1, -4, 1, -12, -1, -6, 1, 24, -1, -82, -12, 124, -4, 8, 4, -16, -8512, ...e(15), -78, 80, -77, 80, -77, 80, -73, 80, -942, 8553, -8546, 8547, -260, -8306, 9468, -9472],
52-
'iso-8859-10': [...e(33), 100, 14, 16, 8, -2, 14, -143, 148, -43, 80, 6, 23, -208, 189, -32, -154, 85, 14, 16, 8, -2, 14, -128, 133, -43, 80, 6, 23, 7831, -7850, -32, -75, -63, ...e(5), 104, -34, -67, 79, -77, 75, -73, 1, 1, 1, 117, 7, -121, 1, 1, 1, 146, -144, 154, -152, ...e(5), 34, -32, ...e(5), 73, -34, -36, 48, -46, 44, -42, 1, 1, 1, 86, 7, -90, 1, 1, 1, 115, -113, 123, -121, 1, 1, 1, 1, 58],
53-
'iso-8859-13': [...e(33), 8061, -8059, 1, 1, 8058, -8056, 1, 49, -47, 173, -171, 1, 1, 1, 24, -22, 1, 1, 1, 8041, -8039, 1, 1, 65, -63, 158, -156, 1, 1, 1, 40, 30, 42, -46, 6, -66, 1, 83, -6, -6, -67, 176, -99, 12, 20, -12, 17, 37, -29, 2, -114, 121, -119, 1, 1, 155, -49, 25, 16, -142, 159, 2, -158, 38, 42, -46, 6, -35, 1, 52, -6, -6, -36, 145, -99, 12, 20, -12, 17, 37, -29, 2, -83, 90, -88, 1, 1, 124, -49, 25, 16, -111, 128, 2, 7835],
54-
'iso-8859-14': [...e(33), 7522, 1, -7520, 103, 1, 7423, -7523, 7641, -7639, 7641, -119, 231, -7749, 1, 202, 7334, 1, -7423, 1, 7455, 1, -7563, 7584, 43, -42, 44, -35, 147, -111, 1, -36, -7585, ...e(15), 165, -163, ...e(5), 7572, -7570, ...e(5), 153, -151, ...e(16), 134, -132, ...e(5), 7541, -7539, ...e(5), 122],
55-
'iso-8859-15': [...e(33), 1, 1, 1, 8201, -8199, 187, -185, 186, -184, ...e(10), 202, -200, 1, 1, 199, -197, 1, 1, 151, 1, 37],
56-
'iso-8859-16': [...e(33), 100, 1, 60, 8043, -142, -7870, -185, 186, -184, 367, -365, 206, -204, 205, 1, -203, 1, 91, 54, 59, 7840, -8039, 1, 199, -113, 268, -350, 151, 1, 37, 4, -188, 1, 1, 64, -62, 66, -64, ...e(9), 65, 51, -113, 1, 1, 124, -122, 132, 22, -151, 1, 1, 1, 60, 258, -315, 1, 1, 1, 33, -31, 35, -33, ...e(9), 34, 51, -82, 1, 1, 93, -91, 101, 22, -120, 1, 1, 1, 29, 258],
57-
'iso-8859-2': [...e(33), 100, 468, -407, -157, 153, 29, -179, 1, 184, -2, 6, 21, -204, 208, -2, -203, 85, 470, -409, -142, 138, 29, 364, -527, 169, -2, 6, 21, 355, -351, -2, -40, -147, 1, 64, -62, 117, -51, -63, 69, -67, 79, -77, 79, -77, 1, 64, 2, 51, 4, -116, 1, 124, -122, 1, 129, 22, -148, 150, -148, 1, 133, -131, 118, -116, 1, 33, -31, 86, -51, -32, 38, -36, 48, -46, 48, -46, 1, 33, 2, 51, 4, -85, 1, 93, -91, 1, 98, 22, -117, 119, -117, 1, 102, 374],
58-
'iso-8859-3': [...e(33), 134, 434, -565, 1, r, 128, -125, 1, 136, 46, -64, 22, -135, r, 206, -203, 119, -117, 1, 1, 1, 112, -110, 1, 121, 46, -64, 22, -120, r, 191, -188, 1, 1, r, 2, 70, -2, -65, ...e(8), r, 2, 1, 1, 1, 76, -74, 1, 69, -67, 1, 1, 1, 144, -16, -125, 1, 1, 1, r, 2, 39, -2, -34, ...e(8), r, 2, 1, 1, 1, 45, -43, 1, 38, -36, 1, 1, 1, 113, -16, 380],
59-
'iso-8859-4': [...e(33), 100, 52, 30, -178, 132, 19, -148, 1, 184, -78, 16, 68, -185, 208, -206, 1, 85, 470, -388, -163, 117, 19, 395, -527, 169, -78, 16, 68, -29, 52, -51, -75, -63, ...e(5), 104, -34, -67, 79, -77, 75, -73, 1, 92, -26, 53, 7, -22, -98, 1, 1, 1, 1, 154, -152, 1, 1, 140, 2, -139, 34, -32, ...e(5), 73, -34, -36, 48, -46, 44, -42, 1, 61, -26, 53, 7, -22, -67, 1, 1, 1, 1, 123, -121, 1, 1, 109, 2, 366],
60-
'iso-8859-5': [...e(33), 865, ...e(11), -863, 865, ...e(65), 7367, -7365, ...e(11), -949, 951, 1],
61-
'iso-8859-6': [...e(33), r, r, r, 4, ...h(7), 1384, -1375, ...h(13), 1390, r, r, r, 4, r, 2, ...e(25), r, r, r, r, r, 6, ...e(18), ...h(13)],
62-
'iso-8859-7': [...e(33), 8056, 1, -8054, 8201, 3, -8201, 1, 1, 1, 721, -719, 1, 1, r, 8040, -8037, 1, 1, 1, 721, 1, 1, -719, 721, 1, 1, -719, 721, -719, 721, ...e(19), r, 2, ...e(43), r],
63-
'iso-8859-8': [...e(33), r, 2, ...e(7), 46, -44, ...e(14), 62, -60, 1, 1, 1, ...h(32), 8025, -6727, ...e(26), r, r, 6692, 1, r],
64-
'koi8-r': [9345, 2, 10, 4, 4, 4, 4, 8, 8, 8, 8, 68, 4, 4, 4, 4, 1, 1, 1, -627, 640, -903, 1, 46, 28, 1, -8645, 8833, -8817, 2, 5, 64, 9305, 1, 1, -8449, 8450, ...e(14), -8544, 8545, ...e(10), -9411, 933, -30, 1, 21, -18, 1, 15, -17, 18, -13, ...e(7), 16, -15, 1, 1, 1, -13, -4, 26, -1, -20, 17, 5, -4, -2, 3, -28, -30, 1, 21, -18, 1, 15, -17, 18, -13, ...e(7), 16, -15, 1, 1, 1, -13, -4, 26, -1, -20, 17, 5, -4, -2, 3],
65-
'koi8-u': [9345, 2, 10, 4, 4, 4, 4, 8, 8, 8, 8, 68, 4, 4, 4, 4, 1, 1, 1, -627, 640, -903, 1, 46, 28, 1, -8645, 8833, -8817, 2, 5, 64, 9305, 1, 1, -8449, 3, 8448, -8446, 1, 8448, 1, 1, 1, 1, -8394, -51, 8448, 1, 1, 1, -8544, 3, 8543, -8541, 1, 8543, 1, 1, 1, 1, -8410, -130, -869, 933, -30, 1, 21, -18, 1, 15, -17, 18, -13, ...e(7), 16, -15, 1, 1, 1, -13, -4, 26, -1, -20, 17, 5, -4, -2, 3, -28, -30, 1, 21, -18, 1, 15, -17, 18, -13, ...e(7), 16, -15, 1, 1, 1, -13, -4, 26, -1, -20, 17, 5, -4, -2, 3],
66-
'macintosh': [69, 1, 2, 2, 8, 5, 6, 5, -1, 2, 2, -1, 2, 2, 2, -1, 2, 1, 2, -1, 2, 1, 2, 2, -1, 2, 2, -1, 5, -1, 2, 1, 7972, -8048, -14, 1, 4, 8059, -8044, 41, -49, -5, 8313, -8302, -12, 8632, -8602, 18, 8518, -8557, 8627, 1, -8640, 16, 8525, 15, -2, -7759, 7787, -8577, 16, 751, -707, 18, -57, -30, 11, 8558, -8328, 8374, -66, -8539, 16, 8043, -8070, 32, 3, 18, 125, 1, 7872, 1, 8, 1, -5, 1, -7970, 9427, -9419, 121, 7884, 104, -115, 1, 56007, 1, -56033, -8042, 8035, 4, 18, -8046, 8, -9, 10, -3, 5, 1, 1, -3, 7, 1, 63531, -63533, 8, 1, -2, 88, 405, 22, -557, 553, 1, 1, -546, 549, -2, -20],
67-
'windows-1250': [8237, -8235, 8089, -8087, 8091, 8, -6, 1, -8089, 8104, -7888, 7897, -7903, 10, 25, -4, -233, 8072, 1, 3, 1, 5, -15, 1, -8060, 8330, -8129, 7897, -7903, 10, 25, -4, -218, 551, 17, -407, -157, 96, -94, 1, 1, 1, 181, -179, 1, 1, 1, 205, -203, 1, 554, -409, -142, 1, 1, 1, 1, 77, 90, -164, 130, 416, -415, 62, -40, -147, 1, 64, -62, 117, -51, -63, 69, -67, 79, -77, 79, -77, 1, 64, 2, 51, 4, -116, 1, 124, -122, 1, 129, 22, -148, 150, -148, 1, 133, -131, 118, -116, 1, 33, -31, 86, -51, -32, 38, -36, 48, -46, 48, -46, 1, 33, 2, 51, 4, -85, 1, 93, -91, 1, 98, 22, -117, 119, -117, 1, 102, 374],
68-
'windows-1251': [899, 1, 7191, -7111, 7115, 8, -6, 1, 139, -124, -7207, 7216, -7215, 2, -1, 4, 67, 7110, 1, 3, 1, 5, -15, 1, -8060, 8330, -7369, 7137, -7136, 2, -1, 4, -959, 878, 80, -86, -868, 1004, -1002, 1, 858, -856, 859, -857, 1, 1, 1, 857, -855, 1, 853, 80, 59, -988, 1, 1, 922, 7365, -7362, -921, 925, -83, 80, 2, -71, ...e(63)],
69-
'windows-1252': [8237, -8235, 8089, -7816, 7820, 8, -6, 1, -7515, 7530, -7888, 7897, -7911, -197, 240, -238, 1, 8072, 1, 3, 1, 5, -15, 1, -7480, 7750, -8129, 7897, -7911, -182, 225, -6],
70-
'windows-1253': [8237, -8235, 8089, -7816, 7820, 8, -6, 1, -8089, 8104, -8102, 8111, -8109, 1, 1, 1, 1, 8072, 1, 3, 1, 5, -15, 1, -8060, 8330, -8328, 8096, -8094, 1, 1, 1, 1, 741, 1, -739, 1, 1, 1, 1, 1, 1, r, 2, 1, 1, 1, 8039, -8037, 1, 1, 1, 721, -719, 1, 1, 721, 1, 1, -719, 721, -719, 721, ...e(19), r, 2, ...e(43), r],
71-
'windows-1254': [8237, -8235, 8089, -7816, 7820, 8, -6, 1, -7515, 7530, -7888, 7897, -7911, -197, 1, 1, 1, 8072, 1, 3, 1, 5, -15, 1, -7480, 7750, -8129, 7897, -7911, -182, 1, 218, -216, ...e(47), 79, -77, ...e(11), 84, 46, -127, ...e(16), 48, -46, ...e(11), 53, 46],
72-
'windows-1255': [8237, -8235, 8089, -7816, 7820, 8, -6, 1, -7515, 7530, -8102, 8111, -8109, 1, 1, 1, 1, 8072, 1, 3, 1, 5, -15, 1, -7480, 7750, -8328, 8096, -8094, ...e(7), 8199, -8197, 1, 1, 1, 1, 46, -44, ...e(14), 62, -60, 1, 1, 1, 1, 1265, ...e(19), 45, 1, 1, 1, 1, ...h(7), -36, ...e(26), r, r, 6692, 1, r],
73-
'windows-1256': [8237, -6702, 6556, -7816, 7820, 8, -6, 1, -7515, 7530, -6583, 6592, -7911, 1332, 18, -16, 39, 6505, 1, 3, 1, 5, -15, 1, -6507, 6777, -6801, 6569, -7911, 7865, 1, -6483, -1562, 1388, -1386, ...e(7), 1557, -1555, ...e(14), 1378, -1376, 1, 1, 1, 1377, 162, -160, ...e(21), -1375, 1376, 1, 1, 1, 6, 1, 1, 1, -1379, 1380, -1378, 1379, 1, 1, 1, -1377, 1, 1, 1, 1, 1374, 1, -1372, 1, 1372, 1, 1, 1, -1370, 1371, 1, -1369, 1370, -1368, 1369, -1367, 1, 7954, 1, -6461],
74-
'windows-1257': [8237, -8235, 8089, -8087, 8091, 8, -6, 1, -8089, 8104, -8102, 8111, -8109, 28, 543, -527, -40, 8072, 1, 3, 1, 5, -15, 1, -8060, 8330, -8328, 8096, -8094, 19, 556, -572, 1, r, 2, 1, 1, r, 2, 1, 49, -47, 173, -171, 1, 1, 1, 24, -22, ...e(5), 1, 1, 65, -63, 158, -156, 1, 1, 1, 40, 30, 42, -46, 6, -66, 1, 83, -6, -6, -67, 176, -99, 12, 20, -12, 17, 37, -29, 2, -114, 121, -119, 1, 1, 155, -49, 25, 16, -142, 159, 2, -158, 38, 42, -46, 6, -35, 1, 52, -6, -6, -36, 145, -99, 12, 20, -12, 17, 37, -29, 2, -83, 90, -88, 1, 1, 124, -49, 25, 16, -111, 128, 2, 347],
75-
'windows-1258': [8237, -8235, 8089, -7816, 7820, 8, -6, 1, -7515, 7530, -8102, 8111, -7911, -197, 1, 1, 1, 8072, 1, 3, 1, 5, -15, 1, -7480, 7750, -8328, 8096, -7911, -182, 1, 218, -216, ...e(34), 64, -62, ...e(7), 565, -563, 1, 1, 65, -63, 568, -566, 1, 204, -202, 1, 1, 1, 1, 1, 1, 211, 340, -548, 1, 1, 1, 33, -31, ...e(7), 534, -532, 1, 1, 34, -32, 562, -560, 1, 173, -171, 1, 1, 1, 1, 1, 1, 180, 7931],
76-
'windows-874': [8237, -8235, 1, 1, 1, 8098, -8096, ...e(10), 8072, 1, 3, 1, 5, -15, 1, -8060, ...e(8), 3425, ...e(57), r, r, r, r, 5, ...e(28), r, r, r, r],
77-
'x-mac-cyrillic': [913, ...e(31), 7153, -8048, 992, -1005, 4, 8059, -8044, 848, -856, -5, 8313, -7456, 80, 7694, -7773, 80, 7627, -8557, 8627, 1, -7695, -929, 988, -137, -4, 80, -77, 80, -78, 80, -79, 80, -2, -83, -857, 8558, -8328, 8374, -66, -8539, 16, 8043, -8070, 875, 80, -79, 80, -7, 7102, 1, 8, 1, -5, 1, -7970, 7975, -7184, 80, -79, 80, 7351, -7445, 80, -2, -31, ...e(30), 7262],
78-
};
79-
80-
/* eslint-enable @stylistic/js/max-len */
81-
82-
/* fallback/single-byte.js + single-byte.node.js, simplified */
83-
84-
const l256 = { __proto__: null, length: 256 };
85-
86-
function getEncoding(encoding) {
87-
if (encoding === 'x-user-defined') {
88-
// https://encoding.spec.whatwg.org/#x-user-defined-decoder, 14.5.1. x-user-defined decoder
89-
return TypedArrayFrom(Uint16Array, l256, (_, i) => (i >= 0x80 ? 0xf700 + i : i));
90-
}
91-
92-
if (!ObjectPrototypeHasOwnProperty(encodings, encoding)) {
93-
throw new ERR_ENCODING_NOT_SUPPORTED(encoding);
94-
}
95-
96-
const map = TypedArrayFrom(Uint16Array, l256, (_, i) => i); // Unicode subset
97-
let prev = 127;
98-
map.set(TypedArrayFrom(Uint16Array, it(encodings[encoding]), (x) => (x === r ? x : (prev += x))), 128);
99-
return map;
100-
}
101-
102-
const supported = new SafeSet(it(ObjectKeys(encodings))).add('iso-8859-8-i').add('x-user-defined');
14+
const encodings = [
15+
'ibm866',
16+
'iso-8859-10',
17+
'iso-8859-13',
18+
'iso-8859-14',
19+
'iso-8859-15',
20+
'iso-8859-16',
21+
'iso-8859-2',
22+
'iso-8859-3',
23+
'iso-8859-4',
24+
'iso-8859-5',
25+
'iso-8859-6',
26+
'iso-8859-7',
27+
'iso-8859-8',
28+
'koi8-r',
29+
'koi8-u',
30+
'macintosh',
31+
'windows-1250',
32+
'windows-1251',
33+
'windows-1252',
34+
'windows-1253',
35+
'windows-1254',
36+
'windows-1255',
37+
'windows-1256',
38+
'windows-1257',
39+
'windows-1258',
40+
'windows-874',
41+
'x-mac-cyrillic',
42+
];
43+
44+
const { decodeSingleByte } = internalBinding('encoding_binding');
45+
46+
const supported = new SafeSet(it(encodings)).add('iso-8859-8-i').add('x-user-defined');
10347
const isSinglebyteEncoding = (enc) => supported.has(enc);
10448

105-
const decodersLoose = new SafeMap();
106-
const decodersFatal = new SafeMap();
107-
10849
function createSinglebyteDecoder(encoding, fatal) {
10950
const id = encoding === 'iso-8859-8-i' ? 'iso-8859-8' : encoding;
110-
const decoders = fatal ? decodersFatal : decodersLoose;
111-
const cached = decoders.get(id);
112-
if (cached) return cached;
113-
114-
const map = getEncoding(id);
115-
const incomplete = TypedArrayPrototypeIncludes(map, r);
116-
117-
// Expects type-checked Buffer input
118-
const decoder = (buf) => {
119-
if (buf.byteLength === 0) return '';
120-
if (isAscii(buf)) return buf.latin1Slice(); // .latin1Slice is faster than .asciiSlice
121-
const o = new Uint16Array(buf.length);
122-
TypedArrayPrototypeSet(o, buf); // Copy to modify in-place, also those are 16-bit now
123-
124-
let i = 0;
125-
for (const end7 = o.length - 7; i < end7; i += 8) {
126-
o[i] = map[o[i]];
127-
o[i + 1] = map[o[i + 1]];
128-
o[i + 2] = map[o[i + 2]];
129-
o[i + 3] = map[o[i + 3]];
130-
o[i + 4] = map[o[i + 4]];
131-
o[i + 5] = map[o[i + 5]];
132-
o[i + 6] = map[o[i + 6]];
133-
o[i + 7] = map[o[i + 7]];
134-
}
135-
136-
for (const end = o.length; i < end; i++) o[i] = map[o[i]];
137-
138-
const b = new FastBuffer(o.buffer, o.byteOffset, o.byteLength);
139-
if (isBigEndian) b.swap16();
140-
const string = b.ucs2Slice();
141-
if (fatal && incomplete && StringPrototypeIncludes(string, '\uFFFD')) {
142-
throw new ERR_ENCODING_INVALID_ENCODED_DATA(encoding, undefined);
143-
}
144-
return string;
145-
};
146-
147-
decoders.set(id, decoder);
148-
return decoder;
51+
const key = id === 'x-user-defined' ? 27 : encodings.indexOf(id);
52+
if (key < 0) throw new ERR_ENCODING_NOT_SUPPORTED(encoding);
53+
return (buf) => decodeSingleByte(buf, key, fatal);
14954
}
15055

15156
module.exports = {
15257
isSinglebyteEncoding,
15358
createSinglebyteDecoder,
154-
getEncoding, // for tests
15559
};

src/encoding_binding.cc

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
#include "encoding_binding.h"
2+
#include "encoding_singlebyte.h"
23
#include "ada.h"
34
#include "env-inl.h"
45
#include "node_errors.h"
@@ -379,6 +380,66 @@ void BindingData::DecodeUTF8(const FunctionCallbackInfo<Value>& args) {
379380
}
380381
}
381382

383+
void BindingData::DecodeSingleByte(const FunctionCallbackInfo<Value>& args) {
384+
Environment* env = Environment::GetCurrent(args);
385+
386+
CHECK_GE(args.Length(), 2);
387+
388+
if (!(args[0]->IsArrayBuffer() || args[0]->IsSharedArrayBuffer() ||
389+
args[0]->IsArrayBufferView())) {
390+
return node::THROW_ERR_INVALID_ARG_TYPE(
391+
env->isolate(),
392+
"The \"list\" argument must be an instance of SharedArrayBuffer, "
393+
"ArrayBuffer or ArrayBufferView.");
394+
}
395+
396+
CHECK(args[1]->IsInt32());
397+
const int encoding = args[1].As<v8::Int32>()->Value();
398+
CHECK(encoding >= 0 && encoding < 28);
399+
400+
ArrayBufferViewContents<uint8_t> buffer(args[0]);
401+
const uint8_t* data = buffer.data();
402+
size_t length = buffer.length();
403+
404+
if (length == 0) return args.GetReturnValue().SetEmptyString();
405+
406+
if (simdutf::validate_ascii(reinterpret_cast<const char*>(data), length)) {
407+
Local<Value> ret;
408+
if (StringBytes::Encode(env->isolate(), reinterpret_cast<const char*>(data), length, LATIN1).ToLocal(&ret)) {
409+
args.GetReturnValue().Set(ret);
410+
}
411+
return;
412+
}
413+
414+
uint16_t* dst = node::UncheckedMalloc<uint16_t>(length);
415+
416+
if (encoding == 27) {
417+
// x-user-defined
418+
for (size_t i = 0; i < length; i++) dst[i] = data[i] >= 0x80 ? data[i] + 0xf700 : data[i];
419+
} else {
420+
bool has_fatal = args[2]->IsTrue();
421+
422+
// TODO: make this static
423+
uint16_t table[256] = { 0 };
424+
const uint16_t* subtable = kSingleByteEncodings[encoding];
425+
for (size_t i = 0; i < 128; i++) table[i] = i;
426+
memcpy(table + 128, subtable, 256);
427+
428+
for (size_t i = 0; i < length; i++) dst[i] = table[data[i]];
429+
430+
// TODO: (1) cache find(subtable) statically, (2) is std:find or string contains faster here?
431+
if (has_fatal && std::find(subtable, subtable + 128, 0xfffd) != subtable + 128 && std::find(dst, dst + length, 0xfffd) != dst + length) {
432+
return node::THROW_ERR_ENCODING_INVALID_ENCODED_DATA(
433+
env->isolate(), "The encoded data was not valid for this encoding");
434+
}
435+
}
436+
437+
Local<Value> ret;
438+
if (StringBytes::Raw(env->isolate(), reinterpret_cast<uint16_t*>(dst), length).ToLocal(&ret)) {
439+
args.GetReturnValue().Set(ret);
440+
}
441+
}
442+
382443
void BindingData::ToASCII(const FunctionCallbackInfo<Value>& args) {
383444
Environment* env = Environment::GetCurrent(args);
384445
CHECK_GE(args.Length(), 1);
@@ -411,6 +472,7 @@ void BindingData::CreatePerIsolateProperties(IsolateData* isolate_data,
411472
SetMethod(isolate, target, "encodeInto", EncodeInto);
412473
SetMethodNoSideEffect(isolate, target, "encodeUtf8String", EncodeUtf8String);
413474
SetMethodNoSideEffect(isolate, target, "decodeUTF8", DecodeUTF8);
475+
SetMethodNoSideEffect(isolate, target, "decodeSingleByte", DecodeSingleByte);
414476
SetMethodNoSideEffect(isolate, target, "toASCII", ToASCII);
415477
SetMethodNoSideEffect(isolate, target, "toUnicode", ToUnicode);
416478
}
@@ -428,6 +490,7 @@ void BindingData::RegisterTimerExternalReferences(
428490
registry->Register(EncodeInto);
429491
registry->Register(EncodeUtf8String);
430492
registry->Register(DecodeUTF8);
493+
registry->Register(DecodeSingleByte);
431494
registry->Register(ToASCII);
432495
registry->Register(ToUnicode);
433496
}

src/encoding_binding.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ class BindingData : public SnapshotableObject {
3131
static void EncodeInto(const v8::FunctionCallbackInfo<v8::Value>& args);
3232
static void EncodeUtf8String(const v8::FunctionCallbackInfo<v8::Value>& args);
3333
static void DecodeUTF8(const v8::FunctionCallbackInfo<v8::Value>& args);
34+
static void DecodeSingleByte(const v8::FunctionCallbackInfo<v8::Value>& args);
3435

3536
static void ToASCII(const v8::FunctionCallbackInfo<v8::Value>& args);
3637
static void ToUnicode(const v8::FunctionCallbackInfo<v8::Value>& args);

0 commit comments

Comments
 (0)