Skip to content

Commit 08cf981

Browse files
committed
test: test various utf8 codepaths on Node.js
1 parent ce478a7 commit 08cf981

File tree

3 files changed

+319
-0
lines changed

3 files changed

+319
-0
lines changed

tests/utf8.hermes.test.cjs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
delete globalThis.TextDecoder
2+
delete String.prototype.isWellFormed
3+
delete String.prototype.toWellFormed
4+
5+
if (globalThis.HermesInternal) {
6+
// Test non-Hermes path on Hermes
7+
delete globalThis.HermesInternal
8+
} else {
9+
// And Hermes path on non-Hermes
10+
globalThis.HermesInternal = true
11+
}
12+
13+
require('./utf8.lib.test.js')

tests/utf8.lib.test.js

Lines changed: 303 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,303 @@
1+
// Full copy of utf8.test.js, but importing directly from ./utf8.js, not following exports
2+
3+
import {
4+
utf8toString,
5+
utf8toStringLoose,
6+
utf8fromString,
7+
utf8fromStringLoose,
8+
} from '../utf8.js'
9+
import { nativeDecoder } from '../fallback/_utils.js'
10+
import * as js from '../fallback/utf8.js'
11+
import { fromHex } from '@exodus/bytes/hex.js'
12+
import { randomValues } from '@exodus/crypto/randomBytes'
13+
import { describe, test } from 'node:test'
14+
15+
// invalid bytes -> string
16+
const nonUtf8 = [
17+
{ bytes: [0, 254, 255], charcodes: [0, 0xff_fd, 0xff_fd] },
18+
{ bytes: [0x80], charcodes: [0xff_fd] },
19+
{ bytes: [0xf0, 0x90, 0x80], charcodes: [0xff_fd] }, // https://npmjs.com/package/buffer is wrong
20+
{ bytes: [0xf0, 0x80, 0x80], charcodes: [0xff_fd, 0xff_fd, 0xff_fd] }, // https://github.com/nodejs/node/issues/16894
21+
]
22+
23+
// invalid string -> bytes
24+
const orphans = [
25+
{ charcodes: [0x61, 0x62, 0xd8_00, 0x77, 0x78], hex: '6162efbfbd7778' },
26+
{ charcodes: [0xd8_00, 0xd8_00], hex: 'efbfbdefbfbd' }, // https://npmjs.com/package/buffer is wrong
27+
{ charcodes: [0x61, 0x62, 0xdf_ff, 0x77, 0x78], hex: '6162efbfbd7778' },
28+
{ charcodes: [0xdf_ff, 0xd8_00], hex: 'efbfbdefbfbd' },
29+
]
30+
31+
const fixtures = [
32+
{ charcodes: new Array(1).fill(0x80), hex: 'c280' },
33+
{ charcodes: new Array(4).fill(0x80), hex: 'c280'.repeat(4) },
34+
{ charcodes: new Array(8).fill(0x80), hex: 'c280'.repeat(8) },
35+
{ charcodes: new Array(16).fill(0x80), hex: 'c280'.repeat(16) },
36+
]
37+
38+
const seed = randomValues(5 * 1024)
39+
const pool = [
40+
new Uint8Array(0),
41+
new Uint8Array(1),
42+
new Uint8Array(256),
43+
new Uint8Array(256).fill(1),
44+
new Uint8Array(256).fill(42),
45+
new Uint8Array(256).fill(0x80),
46+
new Uint8Array(256).fill(0xd0),
47+
new Uint8Array(256).fill(255),
48+
Uint8Array.of(0xef, 0xbb, 0xbf), // BOM
49+
seed.subarray(1, -1),
50+
seed.subarray(2, -2),
51+
seed.subarray(3, -3),
52+
]
53+
54+
for (let i = 0; i < 500; i++) {
55+
pool.push(seed.subarray(Math.floor(Math.random() * seed.length)).map((x, j) => x + i * j))
56+
}
57+
58+
for (const { bytes } of nonUtf8) pool.push(Uint8Array.from(bytes))
59+
for (const { hex } of orphans) pool.push(fromHex(hex))
60+
61+
const poolAscii = pool.map((u8) => u8.map((x) => x & 0x7f))
62+
63+
const { TextDecoder, TextEncoder } = globalThis
64+
65+
describe('utf8toString', () => {
66+
describe('invalid input', () => {
67+
for (const method of [utf8toString, utf8toStringLoose]) {
68+
test(method.name, (t) => {
69+
for (const input of [null, undefined, [], [1, 2], new Uint16Array(1), 'string']) {
70+
t.assert.throws(() => method(input))
71+
}
72+
})
73+
}
74+
})
75+
76+
describe('valid input', () => {
77+
for (const method of [
78+
utf8toString,
79+
utf8toStringLoose,
80+
(x) => js.decode(x, false),
81+
(x) => js.decode(x, true),
82+
]) {
83+
test(method.name || method + '', (t) => {
84+
for (const { charcodes, hex } of fixtures) {
85+
t.assert.strictEqual(method(fromHex(hex)), String.fromCharCode(...charcodes))
86+
}
87+
})
88+
}
89+
})
90+
91+
test('non-utf8 bytes throw in utf8toString', (t) => {
92+
for (const method of [utf8toString, (x) => js.decode(x, false)]) {
93+
for (const { bytes } of nonUtf8) {
94+
t.assert.throws(() => method(Uint8Array.of(...bytes)))
95+
96+
for (let p = 0; p < 130; p++) {
97+
const prefixBytes = new Uint8Array(p).fill(0x20)
98+
t.assert.throws(() => method(Uint8Array.of(...prefixBytes, ...bytes)))
99+
}
100+
101+
for (let s = 0; s < 130; s++) {
102+
const suffixBytes = new Uint8Array(s).fill(0x20)
103+
t.assert.throws(() => method(Uint8Array.of(...bytes, ...suffixBytes)))
104+
}
105+
}
106+
}
107+
})
108+
109+
test('non-utf8 bytes get replaced in utf8toStringLoose', (t) => {
110+
for (const method of [utf8toStringLoose, (x) => js.decode(x, true)]) {
111+
for (const { bytes, charcodes } of nonUtf8) {
112+
const res = method(Uint8Array.of(...bytes))
113+
t.assert.strictEqual(res.length, charcodes.length)
114+
t.assert.strictEqual(res, String.fromCharCode(...charcodes))
115+
116+
for (let p = 0; p < 130; p++) {
117+
const prefixBytes = new Uint8Array(p).fill(0x20)
118+
const prefixString = ' '.repeat(p)
119+
const res = method(Uint8Array.of(...prefixBytes, ...bytes))
120+
t.assert.strictEqual(res.length, p + charcodes.length)
121+
t.assert.strictEqual(res, prefixString + String.fromCharCode(...charcodes))
122+
}
123+
124+
for (let s = 0; s < 130; s++) {
125+
const suffixBytes = new Uint8Array(s).fill(0x20)
126+
const suffixString = ' '.repeat(s)
127+
const res = method(Uint8Array.of(...bytes, ...suffixBytes))
128+
t.assert.strictEqual(res.length, charcodes.length + s)
129+
t.assert.strictEqual(res, String.fromCharCode(...charcodes) + suffixString)
130+
}
131+
}
132+
}
133+
})
134+
})
135+
136+
describe('utf8fromString', () => {
137+
describe('invalid input', () => {
138+
for (const method of [utf8fromString, utf8fromStringLoose]) {
139+
test(method.name, (t) => {
140+
for (const input of [...[null, undefined, [], [1, 2], ['00'], new Uint8Array()]]) {
141+
t.assert.throws(() => method(input))
142+
for (const form of ['uint8', 'buffer', 'hex']) {
143+
t.assert.throws(() => method(input, form))
144+
}
145+
}
146+
})
147+
}
148+
})
149+
150+
describe('valid input', () => {
151+
for (const method of [
152+
utf8fromString,
153+
utf8fromStringLoose,
154+
(x) => js.encode(x, false),
155+
(x) => js.encode(x, true),
156+
]) {
157+
test(method.name || method + '', (t) => {
158+
for (const { charcodes, hex } of fixtures) {
159+
t.assert.deepStrictEqual(method(String.fromCharCode(...charcodes)), fromHex(hex))
160+
}
161+
})
162+
}
163+
})
164+
165+
test('orphans throw in utf8fromString', (t) => {
166+
for (const method of [utf8fromString, (s) => js.encode(s, false)]) {
167+
for (const { charcodes } of orphans) {
168+
t.assert.throws(() => method(String.fromCharCode(...charcodes)))
169+
}
170+
}
171+
})
172+
173+
test('orphans get replaced in utf8fromStringLoose', (t) => {
174+
for (const method of [utf8fromStringLoose, (s) => js.encode(s, true)]) {
175+
for (const { charcodes, hex } of orphans) {
176+
t.assert.deepStrictEqual(method(String.fromCharCode(...charcodes)), fromHex(hex))
177+
}
178+
}
179+
})
180+
})
181+
182+
describe('random data', () => {
183+
const strings = []
184+
const stringsAscii = []
185+
const restored = []
186+
const ignoreBOM = true
187+
188+
let nativeFatal = nativeDecoder
189+
if (nativeFatal) {
190+
try {
191+
// Non-fixed Node.js without ICU doesn't have 'fatal' option support
192+
new TextDecoder('utf8', { fatal: true }) // eslint-disable-line no-new
193+
} catch {
194+
nativeFatal = false
195+
}
196+
}
197+
198+
test('utf8toStringLoose', (t) => {
199+
const textDecoder = nativeDecoder ? new TextDecoder('utf8', { ignoreBOM }) : null // polyfilled might be wrong
200+
const NativeBuffer = globalThis.Buffer && !globalThis.Buffer.TYPED_ARRAY_SUPPORT ? Buffer : null
201+
for (const u8 of pool) {
202+
const str = utf8toStringLoose(u8)
203+
t.assert.strictEqual(str, js.decode(u8, true))
204+
if (textDecoder) t.assert.strictEqual(str, textDecoder.decode(u8))
205+
if (NativeBuffer) t.assert.strictEqual(str, NativeBuffer.from(u8).toString())
206+
strings.push(str)
207+
}
208+
})
209+
210+
test('utf8toString (ascii)', (t) => {
211+
const textDecoder = nativeFatal ? new TextDecoder('utf8', { fatal: true, ignoreBOM }) : null
212+
for (const u8 of poolAscii) {
213+
const str = utf8toString(u8)
214+
t.assert.strictEqual(str, utf8toStringLoose(u8))
215+
t.assert.strictEqual(str, js.decode(u8, false))
216+
t.assert.strictEqual(str, js.decode(u8, true))
217+
if (textDecoder) t.assert.strictEqual(str, textDecoder.decode(u8))
218+
if (globalThis.Buffer) t.assert.strictEqual(str, Buffer.from(u8).toString())
219+
stringsAscii.push(str)
220+
}
221+
})
222+
223+
test('utf8toString', (t) => {
224+
const textDecoder = nativeFatal ? new TextDecoder('utf8', { fatal: true, ignoreBOM }) : null
225+
t.assert.strictEqual(strings.length, pool.length)
226+
for (let i = 0; i < pool.length; i++) {
227+
const u8 = pool[i]
228+
let str
229+
try {
230+
str = utf8toString(u8)
231+
} catch (e) {
232+
if (!(e instanceof TypeError)) throw new Error('Unexpected error')
233+
}
234+
235+
if (str === undefined) {
236+
t.assert.throws(() => js.decode(u8, false))
237+
if (textDecoder) t.assert.throws(() => textDecoder.decode(u8))
238+
} else {
239+
t.assert.strictEqual(str, strings[i])
240+
t.assert.strictEqual(str, utf8toStringLoose(u8))
241+
t.assert.strictEqual(str, js.decode(u8, false))
242+
t.assert.strictEqual(str, js.decode(u8, true))
243+
if (textDecoder) t.assert.strictEqual(str, textDecoder.decode(u8))
244+
if (globalThis.Buffer) t.assert.strictEqual(str, Buffer.from(u8).toString())
245+
}
246+
}
247+
})
248+
249+
test('utf8fromString (ascii)', (t) => {
250+
const textEncoder = TextEncoder ? new TextEncoder() : null
251+
t.assert.strictEqual(stringsAscii.length, poolAscii.length)
252+
for (let i = 0; i < poolAscii.length; i++) {
253+
const u8 = poolAscii[i]
254+
const str = stringsAscii[i]
255+
t.assert.deepStrictEqual(u8, utf8fromString(str))
256+
t.assert.deepStrictEqual(u8, utf8fromStringLoose(str))
257+
t.assert.deepStrictEqual(u8, js.encode(str, false))
258+
t.assert.deepStrictEqual(u8, js.encode(str, true))
259+
if (textEncoder) t.assert.deepStrictEqual(u8, textEncoder.encode(str))
260+
if (globalThis.Buffer) t.assert.deepEqual(u8, Buffer.from(str))
261+
}
262+
})
263+
264+
test('utf8fromString / utf8fromStringLoose', (t) => {
265+
const textEncoder = TextEncoder ? new TextEncoder() : null
266+
t.assert.strictEqual(strings.length, pool.length)
267+
for (let i = 0; i < pool.length; i++) {
268+
const str = strings[i]
269+
const u8 = utf8fromString(str)
270+
t.assert.deepStrictEqual(u8, utf8fromStringLoose(str))
271+
t.assert.deepStrictEqual(u8, js.encode(str, false))
272+
t.assert.deepStrictEqual(u8, js.encode(str, true))
273+
if (textEncoder) t.assert.deepStrictEqual(u8, textEncoder.encode(str))
274+
if (globalThis.Buffer) t.assert.deepEqual(u8, Buffer.from(str))
275+
restored.push(u8)
276+
}
277+
})
278+
279+
test('utf8toString / utf8toStringLoose', (t) => {
280+
const textDecoder = nativeFatal ? new TextDecoder('utf8', { fatal: true, ignoreBOM }) : null
281+
t.assert.strictEqual(strings.length, pool.length)
282+
for (let i = 0; i < pool.length; i++) {
283+
const str = strings[i]
284+
const u8 = restored[i]
285+
t.assert.strictEqual(str, utf8toString(u8))
286+
t.assert.strictEqual(str, utf8toStringLoose(u8))
287+
t.assert.strictEqual(str, js.decode(u8, false))
288+
t.assert.strictEqual(str, js.decode(u8, true))
289+
if (textDecoder) t.assert.strictEqual(str, textDecoder.decode(u8))
290+
if (globalThis.Buffer) t.assert.strictEqual(str, Buffer.from(u8).toString())
291+
}
292+
})
293+
})
294+
295+
const skipLarge =
296+
process.env.EXODUS_TEST_PLATFORM === 'quickjs' ||
297+
process.env.EXODUS_TEST_PLATFORM === 'xs' ||
298+
process.env.EXODUS_TEST_PLATFORM === 'engine262'
299+
test('large strings', { skip: skipLarge }, (t) => {
300+
const s = 'abcde01234'.repeat(12e6) // 120e6 total
301+
// e.g. npmjs.com/buffer fails on this
302+
t.assert.strictEqual(s, utf8toString(utf8fromString(s)))
303+
})

tests/utf8.noenc.test.cjs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
delete globalThis.TextEncoder
2+
delete globalThis.TextDecoder
3+
require('./utf8.lib.test.js')

0 commit comments

Comments
 (0)