Skip to content

Commit 3c3fd8d

Browse files
committed
feat(fs): add Node.js normalizeEncoding for robust encoding handling
Add normalizeEncoding function based on Node.js internal/util.js implementation to handle encoding name variations and normalization. - Add slowCases() helper to handle less common encoding variants - Add normalizeEncoding() for optimized common case handling - Integrate normalizeEncoding into safeReadFile and safeReadFileSync - Handles uppercase, hyphenated, and variant encoding names - Optimized for performance with fast path for utf8/utf-8/null This ensures consistent encoding behavior matching Node.js core, handling variants like UTF-8, utf-8, UTF8, LATIN1, latin1, etc. Based on: https://github.com/nodejs/node/blob/ae62b36d442b7bf987e85ae6e0df0f02cc1bb17f/lib/internal/util.js#L247-L310
1 parent 418ff97 commit 3c3fd8d

File tree

1 file changed

+144
-2
lines changed

1 file changed

+144
-2
lines changed

src/fs.ts

Lines changed: 144 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -356,6 +356,144 @@ function getPath() {
356356
return _path as typeof import('path')
357357
}
358358

359+
/**
360+
* Move the "slow cases" to a separate function to make sure this function gets
361+
* inlined properly. That prioritizes the common case.
362+
*
363+
* Based on Node.js internal/util.js normalizeEncoding implementation.
364+
* @see https://github.com/nodejs/node/blob/ae62b36d442b7bf987e85ae6e0df0f02cc1bb17f/lib/internal/util.js#L247-L310
365+
*
366+
* @param enc - Encoding to normalize
367+
* @returns Normalized encoding string or undefined if no match
368+
* @private
369+
*/
370+
/*@__NO_SIDE_EFFECTS__*/
371+
function slowCases(enc: string): BufferEncoding | undefined {
372+
switch (enc.length) {
373+
case 4:
374+
if (enc === 'UTF8') {
375+
return 'utf8'
376+
}
377+
if (enc === 'ucs2' || enc === 'UCS2') {
378+
return 'utf16le'
379+
}
380+
enc = enc.toLowerCase()
381+
if (enc === 'utf8') {
382+
return 'utf8'
383+
}
384+
if (enc === 'ucs2') {
385+
return 'utf16le'
386+
}
387+
break
388+
case 3:
389+
if (enc === 'hex' || enc === 'HEX' || enc.toLowerCase() === 'hex') {
390+
return 'hex'
391+
}
392+
break
393+
case 5:
394+
if (enc === 'ascii') {
395+
return 'ascii'
396+
}
397+
if (enc === 'ucs-2') {
398+
return 'utf16le'
399+
}
400+
if (enc === 'UTF-8') {
401+
return 'utf8'
402+
}
403+
if (enc === 'ASCII') {
404+
return 'ascii'
405+
}
406+
if (enc === 'UCS-2') {
407+
return 'utf16le'
408+
}
409+
enc = enc.toLowerCase()
410+
if (enc === 'utf-8') {
411+
return 'utf8'
412+
}
413+
if (enc === 'ascii') {
414+
return 'ascii'
415+
}
416+
if (enc === 'ucs-2') {
417+
return 'utf16le'
418+
}
419+
break
420+
case 6:
421+
if (enc === 'base64') {
422+
return 'base64'
423+
}
424+
if (enc === 'latin1' || enc === 'binary') {
425+
return 'latin1'
426+
}
427+
if (enc === 'BASE64') {
428+
return 'base64'
429+
}
430+
if (enc === 'LATIN1' || enc === 'BINARY') {
431+
return 'latin1'
432+
}
433+
enc = enc.toLowerCase()
434+
if (enc === 'base64') {
435+
return 'base64'
436+
}
437+
if (enc === 'latin1' || enc === 'binary') {
438+
return 'latin1'
439+
}
440+
break
441+
case 7:
442+
if (
443+
enc === 'utf16le' ||
444+
enc === 'UTF16LE' ||
445+
enc.toLowerCase() === 'utf16le'
446+
) {
447+
return 'utf16le'
448+
}
449+
break
450+
case 8:
451+
if (
452+
enc === 'utf-16le' ||
453+
enc === 'UTF-16LE' ||
454+
enc.toLowerCase() === 'utf-16le'
455+
) {
456+
return 'utf16le'
457+
}
458+
break
459+
case 9:
460+
if (
461+
enc === 'base64url' ||
462+
enc === 'BASE64URL' ||
463+
enc.toLowerCase() === 'base64url'
464+
) {
465+
return 'base64url'
466+
}
467+
break
468+
default:
469+
if (enc === '') {
470+
return 'utf8'
471+
}
472+
}
473+
return undefined
474+
}
475+
476+
/**
477+
* Normalize encoding string to canonical form.
478+
* Handles common encodings inline for performance, delegates to slowCases for others.
479+
*
480+
* Based on Node.js internal/util.js normalizeEncoding implementation.
481+
* @see https://github.com/nodejs/node/blob/ae62b36d442b7bf987e85ae6e0df0f02cc1bb17f/lib/internal/util.js#L247-L310
482+
*
483+
* @param enc - Encoding to normalize (can be null/undefined)
484+
* @returns Normalized encoding string, defaults to 'utf8'
485+
* @private
486+
*/
487+
/*@__NO_SIDE_EFFECTS__*/
488+
function normalizeEncoding(
489+
enc: BufferEncoding | string | null | undefined,
490+
): BufferEncoding {
491+
if (enc == null || enc === 'utf8' || enc === 'utf-8') {
492+
return 'utf8'
493+
}
494+
return slowCases(enc) ?? 'utf8'
495+
}
496+
359497
/**
360498
* Process directory entries and filter for directories.
361499
* Filters entries to include only directories, optionally excluding empty ones.
@@ -1443,7 +1581,9 @@ export async function safeReadFile(
14431581
: ({ __proto__: null, ...options } as SafeReadOptions)
14441582
const { defaultValue, ...rawReadOpts } = opts as SafeReadOptions
14451583
const readOpts = { __proto__: null, ...rawReadOpts } as ReadOptions
1446-
const { encoding = 'utf8' } = readOpts
1584+
let { encoding = 'utf8' } = readOpts
1585+
// Normalize encoding to canonical form.
1586+
encoding = encoding === null ? null : normalizeEncoding(encoding)
14471587
const shouldReturnBuffer = encoding === null
14481588
const fs = getFs()
14491589
try {
@@ -1510,7 +1650,9 @@ export function safeReadFileSync(
15101650
: ({ __proto__: null, ...options } as SafeReadOptions)
15111651
const { defaultValue, ...rawReadOpts } = opts as SafeReadOptions
15121652
const readOpts = { __proto__: null, ...rawReadOpts } as ReadOptions
1513-
const { encoding = 'utf8' } = readOpts
1653+
let { encoding = 'utf8' } = readOpts
1654+
// Normalize encoding to canonical form.
1655+
encoding = encoding === null ? null : normalizeEncoding(encoding)
15141656
const shouldReturnBuffer = encoding === null
15151657
const fs = getFs()
15161658
try {

0 commit comments

Comments
 (0)