Skip to content

Commit e69d089

Browse files
committed
test: test various utf8 codepaths on Node.js
1 parent ce478a7 commit e69d089

File tree

3 files changed

+314
-0
lines changed

3 files changed

+314
-0
lines changed

tests/utf8.hermes.test.cjs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
delete globalThis.TextDecoder
2+
delete String.prototype.isWellFormed
3+
delete String.prototype.toWellFormed
4+
5+
if (globalThis.HermesInternal) {
6+
// Test non-Hermes path on Hermes
7+
delete globalThis.HermesInternal
8+
} else {
9+
// And Hermes path on non-Hermes
10+
globalThis.HermesInternal = true
11+
}
12+
13+
require('./utf8.lib.test.js')

tests/utf8.lib.test.js

Lines changed: 298 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,298 @@
1+
// Full copy of utf8.test.js, but importing directly from ./utf8.js, not following exports
2+
3+
import { utf8toString, utf8toStringLoose, utf8fromString, utf8fromStringLoose } from '../utf8.js'
4+
import { nativeDecoder } from '../fallback/_utils.js'
5+
import * as js from '../fallback/utf8.js'
6+
import { fromHex } from '@exodus/bytes/hex.js'
7+
import { randomValues } from '@exodus/crypto/randomBytes'
8+
import { describe, test } from 'node:test'
9+
10+
// invalid bytes -> string
11+
const nonUtf8 = [
12+
{ bytes: [0, 254, 255], charcodes: [0, 0xff_fd, 0xff_fd] },
13+
{ bytes: [0x80], charcodes: [0xff_fd] },
14+
{ bytes: [0xf0, 0x90, 0x80], charcodes: [0xff_fd] }, // https://npmjs.com/package/buffer is wrong
15+
{ bytes: [0xf0, 0x80, 0x80], charcodes: [0xff_fd, 0xff_fd, 0xff_fd] }, // https://github.com/nodejs/node/issues/16894
16+
]
17+
18+
// invalid string -> bytes
19+
const orphans = [
20+
{ charcodes: [0x61, 0x62, 0xd8_00, 0x77, 0x78], hex: '6162efbfbd7778' },
21+
{ charcodes: [0xd8_00, 0xd8_00], hex: 'efbfbdefbfbd' }, // https://npmjs.com/package/buffer is wrong
22+
{ charcodes: [0x61, 0x62, 0xdf_ff, 0x77, 0x78], hex: '6162efbfbd7778' },
23+
{ charcodes: [0xdf_ff, 0xd8_00], hex: 'efbfbdefbfbd' },
24+
]
25+
26+
const fixtures = [
27+
{ charcodes: new Array(1).fill(0x80), hex: 'c280' },
28+
{ charcodes: new Array(4).fill(0x80), hex: 'c280'.repeat(4) },
29+
{ charcodes: new Array(8).fill(0x80), hex: 'c280'.repeat(8) },
30+
{ charcodes: new Array(16).fill(0x80), hex: 'c280'.repeat(16) },
31+
]
32+
33+
const seed = randomValues(5 * 1024)
34+
const pool = [
35+
new Uint8Array(0),
36+
new Uint8Array(1),
37+
new Uint8Array(256),
38+
new Uint8Array(256).fill(1),
39+
new Uint8Array(256).fill(42),
40+
new Uint8Array(256).fill(0x80),
41+
new Uint8Array(256).fill(0xd0),
42+
new Uint8Array(256).fill(255),
43+
Uint8Array.of(0xef, 0xbb, 0xbf), // BOM
44+
seed.subarray(1, -1),
45+
seed.subarray(2, -2),
46+
seed.subarray(3, -3),
47+
]
48+
49+
for (let i = 0; i < 500; i++) {
50+
pool.push(seed.subarray(Math.floor(Math.random() * seed.length)).map((x, j) => x + i * j))
51+
}
52+
53+
for (const { bytes } of nonUtf8) pool.push(Uint8Array.from(bytes))
54+
for (const { hex } of orphans) pool.push(fromHex(hex))
55+
56+
const poolAscii = pool.map((u8) => u8.map((x) => x & 0x7f))
57+
58+
const { TextDecoder, TextEncoder } = globalThis
59+
60+
describe('utf8toString', () => {
61+
describe('invalid input', () => {
62+
for (const method of [utf8toString, utf8toStringLoose]) {
63+
test(method.name, (t) => {
64+
for (const input of [null, undefined, [], [1, 2], new Uint16Array(1), 'string']) {
65+
t.assert.throws(() => method(input))
66+
}
67+
})
68+
}
69+
})
70+
71+
describe('valid input', () => {
72+
for (const method of [
73+
utf8toString,
74+
utf8toStringLoose,
75+
(x) => js.decode(x, false),
76+
(x) => js.decode(x, true),
77+
]) {
78+
test(method.name || method + '', (t) => {
79+
for (const { charcodes, hex } of fixtures) {
80+
t.assert.strictEqual(method(fromHex(hex)), String.fromCharCode(...charcodes))
81+
}
82+
})
83+
}
84+
})
85+
86+
test('non-utf8 bytes throw in utf8toString', (t) => {
87+
for (const method of [utf8toString, (x) => js.decode(x, false)]) {
88+
for (const { bytes } of nonUtf8) {
89+
t.assert.throws(() => method(Uint8Array.of(...bytes)))
90+
91+
for (let p = 0; p < 130; p++) {
92+
const prefixBytes = new Uint8Array(p).fill(0x20)
93+
t.assert.throws(() => method(Uint8Array.of(...prefixBytes, ...bytes)))
94+
}
95+
96+
for (let s = 0; s < 130; s++) {
97+
const suffixBytes = new Uint8Array(s).fill(0x20)
98+
t.assert.throws(() => method(Uint8Array.of(...bytes, ...suffixBytes)))
99+
}
100+
}
101+
}
102+
})
103+
104+
test('non-utf8 bytes get replaced in utf8toStringLoose', (t) => {
105+
for (const method of [utf8toStringLoose, (x) => js.decode(x, true)]) {
106+
for (const { bytes, charcodes } of nonUtf8) {
107+
const res = method(Uint8Array.of(...bytes))
108+
t.assert.strictEqual(res.length, charcodes.length)
109+
t.assert.strictEqual(res, String.fromCharCode(...charcodes))
110+
111+
for (let p = 0; p < 130; p++) {
112+
const prefixBytes = new Uint8Array(p).fill(0x20)
113+
const prefixString = ' '.repeat(p)
114+
const res = method(Uint8Array.of(...prefixBytes, ...bytes))
115+
t.assert.strictEqual(res.length, p + charcodes.length)
116+
t.assert.strictEqual(res, prefixString + String.fromCharCode(...charcodes))
117+
}
118+
119+
for (let s = 0; s < 130; s++) {
120+
const suffixBytes = new Uint8Array(s).fill(0x20)
121+
const suffixString = ' '.repeat(s)
122+
const res = method(Uint8Array.of(...bytes, ...suffixBytes))
123+
t.assert.strictEqual(res.length, charcodes.length + s)
124+
t.assert.strictEqual(res, String.fromCharCode(...charcodes) + suffixString)
125+
}
126+
}
127+
}
128+
})
129+
})
130+
131+
describe('utf8fromString', () => {
132+
describe('invalid input', () => {
133+
for (const method of [utf8fromString, utf8fromStringLoose]) {
134+
test(method.name, (t) => {
135+
for (const input of [...[null, undefined, [], [1, 2], ['00'], new Uint8Array()]]) {
136+
t.assert.throws(() => method(input))
137+
for (const form of ['uint8', 'buffer', 'hex']) {
138+
t.assert.throws(() => method(input, form))
139+
}
140+
}
141+
})
142+
}
143+
})
144+
145+
describe('valid input', () => {
146+
for (const method of [
147+
utf8fromString,
148+
utf8fromStringLoose,
149+
(x) => js.encode(x, false),
150+
(x) => js.encode(x, true),
151+
]) {
152+
test(method.name || method + '', (t) => {
153+
for (const { charcodes, hex } of fixtures) {
154+
t.assert.deepStrictEqual(method(String.fromCharCode(...charcodes)), fromHex(hex))
155+
}
156+
})
157+
}
158+
})
159+
160+
test('orphans throw in utf8fromString', (t) => {
161+
for (const method of [utf8fromString, (s) => js.encode(s, false)]) {
162+
for (const { charcodes } of orphans) {
163+
t.assert.throws(() => method(String.fromCharCode(...charcodes)))
164+
}
165+
}
166+
})
167+
168+
test('orphans get replaced in utf8fromStringLoose', (t) => {
169+
for (const method of [utf8fromStringLoose, (s) => js.encode(s, true)]) {
170+
for (const { charcodes, hex } of orphans) {
171+
t.assert.deepStrictEqual(method(String.fromCharCode(...charcodes)), fromHex(hex))
172+
}
173+
}
174+
})
175+
})
176+
177+
describe('random data', () => {
178+
const strings = []
179+
const stringsAscii = []
180+
const restored = []
181+
const ignoreBOM = true
182+
183+
let nativeFatal = nativeDecoder
184+
if (nativeFatal) {
185+
try {
186+
// Non-fixed Node.js without ICU doesn't have 'fatal' option support
187+
new TextDecoder('utf8', { fatal: true }) // eslint-disable-line no-new
188+
} catch {
189+
nativeFatal = false
190+
}
191+
}
192+
193+
test('utf8toStringLoose', (t) => {
194+
const textDecoder = nativeDecoder ? new TextDecoder('utf8', { ignoreBOM }) : null // polyfilled might be wrong
195+
const NativeBuffer = globalThis.Buffer && !globalThis.Buffer.TYPED_ARRAY_SUPPORT ? Buffer : null
196+
for (const u8 of pool) {
197+
const str = utf8toStringLoose(u8)
198+
t.assert.strictEqual(str, js.decode(u8, true))
199+
if (textDecoder) t.assert.strictEqual(str, textDecoder.decode(u8))
200+
if (NativeBuffer) t.assert.strictEqual(str, NativeBuffer.from(u8).toString())
201+
strings.push(str)
202+
}
203+
})
204+
205+
test('utf8toString (ascii)', (t) => {
206+
const textDecoder = nativeFatal ? new TextDecoder('utf8', { fatal: true, ignoreBOM }) : null
207+
for (const u8 of poolAscii) {
208+
const str = utf8toString(u8)
209+
t.assert.strictEqual(str, utf8toStringLoose(u8))
210+
t.assert.strictEqual(str, js.decode(u8, false))
211+
t.assert.strictEqual(str, js.decode(u8, true))
212+
if (textDecoder) t.assert.strictEqual(str, textDecoder.decode(u8))
213+
if (globalThis.Buffer) t.assert.strictEqual(str, Buffer.from(u8).toString())
214+
stringsAscii.push(str)
215+
}
216+
})
217+
218+
test('utf8toString', (t) => {
219+
const textDecoder = nativeFatal ? new TextDecoder('utf8', { fatal: true, ignoreBOM }) : null
220+
t.assert.strictEqual(strings.length, pool.length)
221+
for (let i = 0; i < pool.length; i++) {
222+
const u8 = pool[i]
223+
let str
224+
try {
225+
str = utf8toString(u8)
226+
} catch (e) {
227+
if (!(e instanceof TypeError)) throw new Error('Unexpected error')
228+
}
229+
230+
if (str === undefined) {
231+
t.assert.throws(() => js.decode(u8, false))
232+
if (textDecoder) t.assert.throws(() => textDecoder.decode(u8))
233+
} else {
234+
t.assert.strictEqual(str, strings[i])
235+
t.assert.strictEqual(str, utf8toStringLoose(u8))
236+
t.assert.strictEqual(str, js.decode(u8, false))
237+
t.assert.strictEqual(str, js.decode(u8, true))
238+
if (textDecoder) t.assert.strictEqual(str, textDecoder.decode(u8))
239+
if (globalThis.Buffer) t.assert.strictEqual(str, Buffer.from(u8).toString())
240+
}
241+
}
242+
})
243+
244+
test('utf8fromString (ascii)', (t) => {
245+
const textEncoder = TextEncoder ? new TextEncoder() : null
246+
t.assert.strictEqual(stringsAscii.length, poolAscii.length)
247+
for (let i = 0; i < poolAscii.length; i++) {
248+
const u8 = poolAscii[i]
249+
const str = stringsAscii[i]
250+
t.assert.deepStrictEqual(u8, utf8fromString(str))
251+
t.assert.deepStrictEqual(u8, utf8fromStringLoose(str))
252+
t.assert.deepStrictEqual(u8, js.encode(str, false))
253+
t.assert.deepStrictEqual(u8, js.encode(str, true))
254+
if (textEncoder) t.assert.deepStrictEqual(u8, textEncoder.encode(str))
255+
if (globalThis.Buffer) t.assert.deepEqual(u8, Buffer.from(str))
256+
}
257+
})
258+
259+
test('utf8fromString / utf8fromStringLoose', (t) => {
260+
const textEncoder = TextEncoder ? new TextEncoder() : null
261+
t.assert.strictEqual(strings.length, pool.length)
262+
for (let i = 0; i < pool.length; i++) {
263+
const str = strings[i]
264+
const u8 = utf8fromString(str)
265+
t.assert.deepStrictEqual(u8, utf8fromStringLoose(str))
266+
t.assert.deepStrictEqual(u8, js.encode(str, false))
267+
t.assert.deepStrictEqual(u8, js.encode(str, true))
268+
if (textEncoder) t.assert.deepStrictEqual(u8, textEncoder.encode(str))
269+
if (globalThis.Buffer) t.assert.deepEqual(u8, Buffer.from(str))
270+
restored.push(u8)
271+
}
272+
})
273+
274+
test('utf8toString / utf8toStringLoose', (t) => {
275+
const textDecoder = nativeFatal ? new TextDecoder('utf8', { fatal: true, ignoreBOM }) : null
276+
t.assert.strictEqual(strings.length, pool.length)
277+
for (let i = 0; i < pool.length; i++) {
278+
const str = strings[i]
279+
const u8 = restored[i]
280+
t.assert.strictEqual(str, utf8toString(u8))
281+
t.assert.strictEqual(str, utf8toStringLoose(u8))
282+
t.assert.strictEqual(str, js.decode(u8, false))
283+
t.assert.strictEqual(str, js.decode(u8, true))
284+
if (textDecoder) t.assert.strictEqual(str, textDecoder.decode(u8))
285+
if (globalThis.Buffer) t.assert.strictEqual(str, Buffer.from(u8).toString())
286+
}
287+
})
288+
})
289+
290+
const skipLarge =
291+
process.env.EXODUS_TEST_PLATFORM === 'quickjs' ||
292+
process.env.EXODUS_TEST_PLATFORM === 'xs' ||
293+
process.env.EXODUS_TEST_PLATFORM === 'engine262'
294+
test('large strings', { skip: skipLarge }, (t) => {
295+
const s = 'abcde01234'.repeat(12e6) // 120e6 total
296+
// e.g. npmjs.com/buffer fails on this
297+
t.assert.strictEqual(s, utf8toString(utf8fromString(s)))
298+
})

tests/utf8.noenc.test.cjs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
delete globalThis.TextEncoder
2+
delete globalThis.TextDecoder
3+
require('./utf8.lib.test.js')

0 commit comments

Comments
 (0)