|
1 | | -// Simplified version extracted from https://npmjs.com/package/@exodus/bytes codepath for 1-byte encodings |
2 | | -// Copyright Exodus Movement. Licensed under MIT License. |
3 | | - |
4 | 1 | 'use strict'; |
5 | 2 |
|
6 | 3 | const { |
7 | | - Array, |
8 | | - ArrayPrototypeFill, |
9 | | - ObjectKeys, |
10 | | - ObjectPrototypeHasOwnProperty, |
11 | 4 | SafeArrayIterator, |
12 | | - SafeMap, |
13 | 5 | SafeSet, |
14 | | - StringPrototypeIncludes, |
15 | | - TypedArrayFrom, |
16 | | - TypedArrayOf, |
17 | | - TypedArrayPrototypeIncludes, |
18 | | - TypedArrayPrototypeSet, |
19 | | - Uint16Array, |
20 | 6 | } = primordials; |
21 | 7 |
|
22 | | -const { isAscii } = require('buffer'); |
23 | | - |
24 | | -const { FastBuffer } = require('internal/buffer'); |
25 | | - |
26 | 8 | const { |
27 | 9 | ERR_ENCODING_NOT_SUPPORTED, |
28 | | - ERR_ENCODING_INVALID_ENCODED_DATA, |
29 | 10 | } = require('internal/errors').codes; |
30 | 11 |
|
31 | | -const isBigEndian = new FastBuffer(TypedArrayOf(Uint16Array, 258).buffer)[1] === 2; |
32 | | - |
33 | 12 | const it = (x) => new SafeArrayIterator(x); |
34 | 13 |
|
35 | | -/* fallback/single-byte.encodings.js */ |
36 | | - |
37 | | -const r = 0xfffd; |
38 | | -const e = (x) => it(ArrayPrototypeFill(new Array(x), 1)); |
39 | | -const h = (x) => it(ArrayPrototypeFill(new Array(x), r)); |
40 | | - |
41 | | -/* eslint-disable @stylistic/js/max-len */ |
42 | | - |
43 | | -// Index tables from https://encoding.spec.whatwg.org/#legacy-single-byte-encodings |
44 | | -// Each table in the spec lists only mapping from byte 0x80 onwards, as below that they are all ASCII and mapped as idenity |
45 | | -// Here, 0xfffd (replacement charcode) designates a hole (unmapped offset), as not all encodings map all offsets |
46 | | -// All other numbers are deltas from the last seen mapped value, starting with 0x7f (127, highest ASCII) |
47 | | -// Thus, [0x80, 0x81, , 0x83] is stored as [1, 1, r, 2] |
48 | | -// Truncation (length < 128) means that all remaining ones are mapped as identity (offset i => codepoint i), not unmapped |
49 | | -const encodings = { |
50 | | - '__proto__': null, |
51 | | - 'ibm866': [913, ...e(47), 8530, 1, 1, -145, 34, 61, 1, -12, -1, 14, -18, 6, 6, -1, -1, -75, 4, 32, -8, -16, -28, 60, 34, 1, -5, -6, 21, -3, -6, -16, 28, -5, 1, -4, 1, -12, -1, -6, 1, 24, -1, -82, -12, 124, -4, 8, 4, -16, -8512, ...e(15), -78, 80, -77, 80, -77, 80, -73, 80, -942, 8553, -8546, 8547, -260, -8306, 9468, -9472], |
52 | | - 'iso-8859-10': [...e(33), 100, 14, 16, 8, -2, 14, -143, 148, -43, 80, 6, 23, -208, 189, -32, -154, 85, 14, 16, 8, -2, 14, -128, 133, -43, 80, 6, 23, 7831, -7850, -32, -75, -63, ...e(5), 104, -34, -67, 79, -77, 75, -73, 1, 1, 1, 117, 7, -121, 1, 1, 1, 146, -144, 154, -152, ...e(5), 34, -32, ...e(5), 73, -34, -36, 48, -46, 44, -42, 1, 1, 1, 86, 7, -90, 1, 1, 1, 115, -113, 123, -121, 1, 1, 1, 1, 58], |
53 | | - 'iso-8859-13': [...e(33), 8061, -8059, 1, 1, 8058, -8056, 1, 49, -47, 173, -171, 1, 1, 1, 24, -22, 1, 1, 1, 8041, -8039, 1, 1, 65, -63, 158, -156, 1, 1, 1, 40, 30, 42, -46, 6, -66, 1, 83, -6, -6, -67, 176, -99, 12, 20, -12, 17, 37, -29, 2, -114, 121, -119, 1, 1, 155, -49, 25, 16, -142, 159, 2, -158, 38, 42, -46, 6, -35, 1, 52, -6, -6, -36, 145, -99, 12, 20, -12, 17, 37, -29, 2, -83, 90, -88, 1, 1, 124, -49, 25, 16, -111, 128, 2, 7835], |
54 | | - 'iso-8859-14': [...e(33), 7522, 1, -7520, 103, 1, 7423, -7523, 7641, -7639, 7641, -119, 231, -7749, 1, 202, 7334, 1, -7423, 1, 7455, 1, -7563, 7584, 43, -42, 44, -35, 147, -111, 1, -36, -7585, ...e(15), 165, -163, ...e(5), 7572, -7570, ...e(5), 153, -151, ...e(16), 134, -132, ...e(5), 7541, -7539, ...e(5), 122], |
55 | | - 'iso-8859-15': [...e(33), 1, 1, 1, 8201, -8199, 187, -185, 186, -184, ...e(10), 202, -200, 1, 1, 199, -197, 1, 1, 151, 1, 37], |
56 | | - 'iso-8859-16': [...e(33), 100, 1, 60, 8043, -142, -7870, -185, 186, -184, 367, -365, 206, -204, 205, 1, -203, 1, 91, 54, 59, 7840, -8039, 1, 199, -113, 268, -350, 151, 1, 37, 4, -188, 1, 1, 64, -62, 66, -64, ...e(9), 65, 51, -113, 1, 1, 124, -122, 132, 22, -151, 1, 1, 1, 60, 258, -315, 1, 1, 1, 33, -31, 35, -33, ...e(9), 34, 51, -82, 1, 1, 93, -91, 101, 22, -120, 1, 1, 1, 29, 258], |
57 | | - 'iso-8859-2': [...e(33), 100, 468, -407, -157, 153, 29, -179, 1, 184, -2, 6, 21, -204, 208, -2, -203, 85, 470, -409, -142, 138, 29, 364, -527, 169, -2, 6, 21, 355, -351, -2, -40, -147, 1, 64, -62, 117, -51, -63, 69, -67, 79, -77, 79, -77, 1, 64, 2, 51, 4, -116, 1, 124, -122, 1, 129, 22, -148, 150, -148, 1, 133, -131, 118, -116, 1, 33, -31, 86, -51, -32, 38, -36, 48, -46, 48, -46, 1, 33, 2, 51, 4, -85, 1, 93, -91, 1, 98, 22, -117, 119, -117, 1, 102, 374], |
58 | | - 'iso-8859-3': [...e(33), 134, 434, -565, 1, r, 128, -125, 1, 136, 46, -64, 22, -135, r, 206, -203, 119, -117, 1, 1, 1, 112, -110, 1, 121, 46, -64, 22, -120, r, 191, -188, 1, 1, r, 2, 70, -2, -65, ...e(8), r, 2, 1, 1, 1, 76, -74, 1, 69, -67, 1, 1, 1, 144, -16, -125, 1, 1, 1, r, 2, 39, -2, -34, ...e(8), r, 2, 1, 1, 1, 45, -43, 1, 38, -36, 1, 1, 1, 113, -16, 380], |
59 | | - 'iso-8859-4': [...e(33), 100, 52, 30, -178, 132, 19, -148, 1, 184, -78, 16, 68, -185, 208, -206, 1, 85, 470, -388, -163, 117, 19, 395, -527, 169, -78, 16, 68, -29, 52, -51, -75, -63, ...e(5), 104, -34, -67, 79, -77, 75, -73, 1, 92, -26, 53, 7, -22, -98, 1, 1, 1, 1, 154, -152, 1, 1, 140, 2, -139, 34, -32, ...e(5), 73, -34, -36, 48, -46, 44, -42, 1, 61, -26, 53, 7, -22, -67, 1, 1, 1, 1, 123, -121, 1, 1, 109, 2, 366], |
60 | | - 'iso-8859-5': [...e(33), 865, ...e(11), -863, 865, ...e(65), 7367, -7365, ...e(11), -949, 951, 1], |
61 | | - 'iso-8859-6': [...e(33), r, r, r, 4, ...h(7), 1384, -1375, ...h(13), 1390, r, r, r, 4, r, 2, ...e(25), r, r, r, r, r, 6, ...e(18), ...h(13)], |
62 | | - 'iso-8859-7': [...e(33), 8056, 1, -8054, 8201, 3, -8201, 1, 1, 1, 721, -719, 1, 1, r, 8040, -8037, 1, 1, 1, 721, 1, 1, -719, 721, 1, 1, -719, 721, -719, 721, ...e(19), r, 2, ...e(43), r], |
63 | | - 'iso-8859-8': [...e(33), r, 2, ...e(7), 46, -44, ...e(14), 62, -60, 1, 1, 1, ...h(32), 8025, -6727, ...e(26), r, r, 6692, 1, r], |
64 | | - 'koi8-r': [9345, 2, 10, 4, 4, 4, 4, 8, 8, 8, 8, 68, 4, 4, 4, 4, 1, 1, 1, -627, 640, -903, 1, 46, 28, 1, -8645, 8833, -8817, 2, 5, 64, 9305, 1, 1, -8449, 8450, ...e(14), -8544, 8545, ...e(10), -9411, 933, -30, 1, 21, -18, 1, 15, -17, 18, -13, ...e(7), 16, -15, 1, 1, 1, -13, -4, 26, -1, -20, 17, 5, -4, -2, 3, -28, -30, 1, 21, -18, 1, 15, -17, 18, -13, ...e(7), 16, -15, 1, 1, 1, -13, -4, 26, -1, -20, 17, 5, -4, -2, 3], |
65 | | - 'koi8-u': [9345, 2, 10, 4, 4, 4, 4, 8, 8, 8, 8, 68, 4, 4, 4, 4, 1, 1, 1, -627, 640, -903, 1, 46, 28, 1, -8645, 8833, -8817, 2, 5, 64, 9305, 1, 1, -8449, 3, 8448, -8446, 1, 8448, 1, 1, 1, 1, -8394, -51, 8448, 1, 1, 1, -8544, 3, 8543, -8541, 1, 8543, 1, 1, 1, 1, -8410, -130, -869, 933, -30, 1, 21, -18, 1, 15, -17, 18, -13, ...e(7), 16, -15, 1, 1, 1, -13, -4, 26, -1, -20, 17, 5, -4, -2, 3, -28, -30, 1, 21, -18, 1, 15, -17, 18, -13, ...e(7), 16, -15, 1, 1, 1, -13, -4, 26, -1, -20, 17, 5, -4, -2, 3], |
66 | | - 'macintosh': [69, 1, 2, 2, 8, 5, 6, 5, -1, 2, 2, -1, 2, 2, 2, -1, 2, 1, 2, -1, 2, 1, 2, 2, -1, 2, 2, -1, 5, -1, 2, 1, 7972, -8048, -14, 1, 4, 8059, -8044, 41, -49, -5, 8313, -8302, -12, 8632, -8602, 18, 8518, -8557, 8627, 1, -8640, 16, 8525, 15, -2, -7759, 7787, -8577, 16, 751, -707, 18, -57, -30, 11, 8558, -8328, 8374, -66, -8539, 16, 8043, -8070, 32, 3, 18, 125, 1, 7872, 1, 8, 1, -5, 1, -7970, 9427, -9419, 121, 7884, 104, -115, 1, 56007, 1, -56033, -8042, 8035, 4, 18, -8046, 8, -9, 10, -3, 5, 1, 1, -3, 7, 1, 63531, -63533, 8, 1, -2, 88, 405, 22, -557, 553, 1, 1, -546, 549, -2, -20], |
67 | | - 'windows-1250': [8237, -8235, 8089, -8087, 8091, 8, -6, 1, -8089, 8104, -7888, 7897, -7903, 10, 25, -4, -233, 8072, 1, 3, 1, 5, -15, 1, -8060, 8330, -8129, 7897, -7903, 10, 25, -4, -218, 551, 17, -407, -157, 96, -94, 1, 1, 1, 181, -179, 1, 1, 1, 205, -203, 1, 554, -409, -142, 1, 1, 1, 1, 77, 90, -164, 130, 416, -415, 62, -40, -147, 1, 64, -62, 117, -51, -63, 69, -67, 79, -77, 79, -77, 1, 64, 2, 51, 4, -116, 1, 124, -122, 1, 129, 22, -148, 150, -148, 1, 133, -131, 118, -116, 1, 33, -31, 86, -51, -32, 38, -36, 48, -46, 48, -46, 1, 33, 2, 51, 4, -85, 1, 93, -91, 1, 98, 22, -117, 119, -117, 1, 102, 374], |
68 | | - 'windows-1251': [899, 1, 7191, -7111, 7115, 8, -6, 1, 139, -124, -7207, 7216, -7215, 2, -1, 4, 67, 7110, 1, 3, 1, 5, -15, 1, -8060, 8330, -7369, 7137, -7136, 2, -1, 4, -959, 878, 80, -86, -868, 1004, -1002, 1, 858, -856, 859, -857, 1, 1, 1, 857, -855, 1, 853, 80, 59, -988, 1, 1, 922, 7365, -7362, -921, 925, -83, 80, 2, -71, ...e(63)], |
69 | | - 'windows-1252': [8237, -8235, 8089, -7816, 7820, 8, -6, 1, -7515, 7530, -7888, 7897, -7911, -197, 240, -238, 1, 8072, 1, 3, 1, 5, -15, 1, -7480, 7750, -8129, 7897, -7911, -182, 225, -6], |
70 | | - 'windows-1253': [8237, -8235, 8089, -7816, 7820, 8, -6, 1, -8089, 8104, -8102, 8111, -8109, 1, 1, 1, 1, 8072, 1, 3, 1, 5, -15, 1, -8060, 8330, -8328, 8096, -8094, 1, 1, 1, 1, 741, 1, -739, 1, 1, 1, 1, 1, 1, r, 2, 1, 1, 1, 8039, -8037, 1, 1, 1, 721, -719, 1, 1, 721, 1, 1, -719, 721, -719, 721, ...e(19), r, 2, ...e(43), r], |
71 | | - 'windows-1254': [8237, -8235, 8089, -7816, 7820, 8, -6, 1, -7515, 7530, -7888, 7897, -7911, -197, 1, 1, 1, 8072, 1, 3, 1, 5, -15, 1, -7480, 7750, -8129, 7897, -7911, -182, 1, 218, -216, ...e(47), 79, -77, ...e(11), 84, 46, -127, ...e(16), 48, -46, ...e(11), 53, 46], |
72 | | - 'windows-1255': [8237, -8235, 8089, -7816, 7820, 8, -6, 1, -7515, 7530, -8102, 8111, -8109, 1, 1, 1, 1, 8072, 1, 3, 1, 5, -15, 1, -7480, 7750, -8328, 8096, -8094, ...e(7), 8199, -8197, 1, 1, 1, 1, 46, -44, ...e(14), 62, -60, 1, 1, 1, 1, 1265, ...e(19), 45, 1, 1, 1, 1, ...h(7), -36, ...e(26), r, r, 6692, 1, r], |
73 | | - 'windows-1256': [8237, -6702, 6556, -7816, 7820, 8, -6, 1, -7515, 7530, -6583, 6592, -7911, 1332, 18, -16, 39, 6505, 1, 3, 1, 5, -15, 1, -6507, 6777, -6801, 6569, -7911, 7865, 1, -6483, -1562, 1388, -1386, ...e(7), 1557, -1555, ...e(14), 1378, -1376, 1, 1, 1, 1377, 162, -160, ...e(21), -1375, 1376, 1, 1, 1, 6, 1, 1, 1, -1379, 1380, -1378, 1379, 1, 1, 1, -1377, 1, 1, 1, 1, 1374, 1, -1372, 1, 1372, 1, 1, 1, -1370, 1371, 1, -1369, 1370, -1368, 1369, -1367, 1, 7954, 1, -6461], |
74 | | - 'windows-1257': [8237, -8235, 8089, -8087, 8091, 8, -6, 1, -8089, 8104, -8102, 8111, -8109, 28, 543, -527, -40, 8072, 1, 3, 1, 5, -15, 1, -8060, 8330, -8328, 8096, -8094, 19, 556, -572, 1, r, 2, 1, 1, r, 2, 1, 49, -47, 173, -171, 1, 1, 1, 24, -22, ...e(5), 1, 1, 65, -63, 158, -156, 1, 1, 1, 40, 30, 42, -46, 6, -66, 1, 83, -6, -6, -67, 176, -99, 12, 20, -12, 17, 37, -29, 2, -114, 121, -119, 1, 1, 155, -49, 25, 16, -142, 159, 2, -158, 38, 42, -46, 6, -35, 1, 52, -6, -6, -36, 145, -99, 12, 20, -12, 17, 37, -29, 2, -83, 90, -88, 1, 1, 124, -49, 25, 16, -111, 128, 2, 347], |
75 | | - 'windows-1258': [8237, -8235, 8089, -7816, 7820, 8, -6, 1, -7515, 7530, -8102, 8111, -7911, -197, 1, 1, 1, 8072, 1, 3, 1, 5, -15, 1, -7480, 7750, -8328, 8096, -7911, -182, 1, 218, -216, ...e(34), 64, -62, ...e(7), 565, -563, 1, 1, 65, -63, 568, -566, 1, 204, -202, 1, 1, 1, 1, 1, 1, 211, 340, -548, 1, 1, 1, 33, -31, ...e(7), 534, -532, 1, 1, 34, -32, 562, -560, 1, 173, -171, 1, 1, 1, 1, 1, 1, 180, 7931], |
76 | | - 'windows-874': [8237, -8235, 1, 1, 1, 8098, -8096, ...e(10), 8072, 1, 3, 1, 5, -15, 1, -8060, ...e(8), 3425, ...e(57), r, r, r, r, 5, ...e(28), r, r, r, r], |
77 | | - 'x-mac-cyrillic': [913, ...e(31), 7153, -8048, 992, -1005, 4, 8059, -8044, 848, -856, -5, 8313, -7456, 80, 7694, -7773, 80, 7627, -8557, 8627, 1, -7695, -929, 988, -137, -4, 80, -77, 80, -78, 80, -79, 80, -2, -83, -857, 8558, -8328, 8374, -66, -8539, 16, 8043, -8070, 875, 80, -79, 80, -7, 7102, 1, 8, 1, -5, 1, -7970, 7975, -7184, 80, -79, 80, 7351, -7445, 80, -2, -31, ...e(30), 7262], |
78 | | -}; |
79 | | - |
80 | | -/* eslint-enable @stylistic/js/max-len */ |
81 | | - |
82 | | -/* fallback/single-byte.js + single-byte.node.js, simplified */ |
83 | | - |
84 | | -const l256 = { __proto__: null, length: 256 }; |
85 | | - |
86 | | -function getEncoding(encoding) { |
87 | | - if (encoding === 'x-user-defined') { |
88 | | - // https://encoding.spec.whatwg.org/#x-user-defined-decoder, 14.5.1. x-user-defined decoder |
89 | | - return TypedArrayFrom(Uint16Array, l256, (_, i) => (i >= 0x80 ? 0xf700 + i : i)); |
90 | | - } |
91 | | - |
92 | | - if (!ObjectPrototypeHasOwnProperty(encodings, encoding)) { |
93 | | - throw new ERR_ENCODING_NOT_SUPPORTED(encoding); |
94 | | - } |
95 | | - |
96 | | - const map = TypedArrayFrom(Uint16Array, l256, (_, i) => i); // Unicode subset |
97 | | - let prev = 127; |
98 | | - map.set(TypedArrayFrom(Uint16Array, it(encodings[encoding]), (x) => (x === r ? x : (prev += x))), 128); |
99 | | - return map; |
100 | | -} |
101 | | - |
102 | | -const supported = new SafeSet(it(ObjectKeys(encodings))).add('iso-8859-8-i').add('x-user-defined'); |
| 14 | +const encodings = [ |
| 15 | + 'ibm866', |
| 16 | + 'koi8-r', |
| 17 | + 'koi8-u', |
| 18 | + 'macintosh', |
| 19 | + 'x-mac-cyrillic', |
| 20 | + 'iso-8859-2', |
| 21 | + 'iso-8859-3', |
| 22 | + 'iso-8859-4', |
| 23 | + 'iso-8859-5', |
| 24 | + 'iso-8859-6', |
| 25 | + 'iso-8859-7', |
| 26 | + 'iso-8859-8', |
| 27 | + 'iso-8859-10', |
| 28 | + 'iso-8859-13', |
| 29 | + 'iso-8859-14', |
| 30 | + 'iso-8859-15', |
| 31 | + 'iso-8859-16', |
| 32 | + 'windows-874', |
| 33 | + 'windows-1250', |
| 34 | + 'windows-1251', |
| 35 | + 'windows-1252', |
| 36 | + 'windows-1253', |
| 37 | + 'windows-1254', |
| 38 | + 'windows-1255', |
| 39 | + 'windows-1256', |
| 40 | + 'windows-1257', |
| 41 | + 'windows-1258', |
| 42 | +]; |
| 43 | + |
| 44 | +const { decodeSingleByte } = internalBinding('encoding_binding'); |
| 45 | + |
| 46 | +const supported = new SafeSet(it(encodings)).add('iso-8859-8-i').add('x-user-defined'); |
103 | 47 | const isSinglebyteEncoding = (enc) => supported.has(enc); |
104 | 48 |
|
105 | | -const decodersLoose = new SafeMap(); |
106 | | -const decodersFatal = new SafeMap(); |
107 | | - |
108 | 49 | function createSinglebyteDecoder(encoding, fatal) { |
109 | 50 | const id = encoding === 'iso-8859-8-i' ? 'iso-8859-8' : encoding; |
110 | | - const decoders = fatal ? decodersFatal : decodersLoose; |
111 | | - const cached = decoders.get(id); |
112 | | - if (cached) return cached; |
113 | | - |
114 | | - const map = getEncoding(id); |
115 | | - const incomplete = TypedArrayPrototypeIncludes(map, r); |
116 | | - |
117 | | - // Expects type-checked Buffer input |
118 | | - const decoder = (buf) => { |
119 | | - if (buf.byteLength === 0) return ''; |
120 | | - if (isAscii(buf)) return buf.latin1Slice(); // .latin1Slice is faster than .asciiSlice |
121 | | - const o = new Uint16Array(buf.length); |
122 | | - TypedArrayPrototypeSet(o, buf); // Copy to modify in-place, also those are 16-bit now |
123 | | - |
124 | | - let i = 0; |
125 | | - for (const end7 = o.length - 7; i < end7; i += 8) { |
126 | | - o[i] = map[o[i]]; |
127 | | - o[i + 1] = map[o[i + 1]]; |
128 | | - o[i + 2] = map[o[i + 2]]; |
129 | | - o[i + 3] = map[o[i + 3]]; |
130 | | - o[i + 4] = map[o[i + 4]]; |
131 | | - o[i + 5] = map[o[i + 5]]; |
132 | | - o[i + 6] = map[o[i + 6]]; |
133 | | - o[i + 7] = map[o[i + 7]]; |
134 | | - } |
135 | | - |
136 | | - for (const end = o.length; i < end; i++) o[i] = map[o[i]]; |
137 | | - |
138 | | - const b = new FastBuffer(o.buffer, o.byteOffset, o.byteLength); |
139 | | - if (isBigEndian) b.swap16(); |
140 | | - const string = b.ucs2Slice(); |
141 | | - if (fatal && incomplete && StringPrototypeIncludes(string, '\uFFFD')) { |
142 | | - throw new ERR_ENCODING_INVALID_ENCODED_DATA(encoding, undefined); |
143 | | - } |
144 | | - return string; |
145 | | - }; |
146 | | - |
147 | | - decoders.set(id, decoder); |
148 | | - return decoder; |
| 51 | + const key = id === 'x-user-defined' ? 27 : encodings.indexOf(id); |
| 52 | + if (key < 0) throw new ERR_ENCODING_NOT_SUPPORTED(encoding); |
| 53 | + return (buf) => decodeSingleByte(buf, key, fatal); |
149 | 54 | } |
150 | 55 |
|
151 | 56 | module.exports = { |
152 | 57 | isSinglebyteEncoding, |
153 | 58 | createSinglebyteDecoder, |
154 | | - getEncoding, // for tests |
155 | 59 | }; |
0 commit comments