@@ -61,54 +61,79 @@ export function createSinglebyteDecoder(encoding, loose = false) {
6161
6262const NON_LATIN = / [ ^ \x00 - \xFF ] / // eslint-disable-line no-control-regex
6363
64- function encode ( s , m ) {
64+ function encode ( s , m , loose ) {
6565 const len = s . length
6666 let i = 0
6767 const b = Buffer . from ( s , 'utf-16le' ) // aligned
6868 if ( ! isLE ) b . swap16 ( )
6969 const x = new Uint16Array ( b . buffer , b . byteOffset , b . byteLength / 2 )
70- for ( const len3 = len - 3 ; i < len3 ; i += 4 ) {
70+ if ( ! m || m . length < 256 ) return null // perf
71+ const len3 = len - 3
72+ while ( i < len3 ) {
7173 const x0 = x [ i ] , x1 = x [ i + 1 ] , x2 = x [ i + 2 ] , x3 = x [ i + 3 ] // prettier-ignore
7274 const c0 = m [ x0 ] , c1 = m [ x1 ] , c2 = m [ x2 ] , c3 = m [ x3 ] // prettier-ignore
73- if ( ! ( c0 && c1 && c2 && c3 ) && ( ( ! c0 && x0 ) || ( ! c1 && x1 ) || ( ! c2 && x2 ) || ( ! c3 && x3 ) ) ) return null // prettier-ignore
75+ if ( ! ( c0 && c1 && c2 && c3 ) && ( ( ! c0 && x0 ) || ( ! c1 && x1 ) || ( ! c2 && x2 ) || ( ! c3 && x3 ) ) ) break
7476 x [ i ] = c0
7577 x [ i + 1 ] = c1
7678 x [ i + 2 ] = c2
7779 x [ i + 3 ] = c3
80+ i += 4
7881 }
7982
83+ const mlen = m . length
8084 for ( ; i < len ; i ++ ) {
8185 const x0 = x [ i ]
86+ if ( x0 >= mlen ) break
8287 const c0 = m [ x0 ]
83- if ( ! c0 && x0 ) return null
88+ if ( ! c0 && x0 ) break
8489 x [ i ] = c0
8590 }
8691
87- return new Uint8Array ( x )
92+ if ( i === len ) return new Uint8Array ( x )
93+ if ( ! loose ) return null
94+ let j = i
95+ while ( i < len ) {
96+ const x0 = x [ i ++ ]
97+ if ( x0 >= 0xd8_00 && x0 < 0xdc_00 ) {
98+ if ( i < len ) {
99+ const x1 = x [ i ]
100+ if ( x1 >= 0xdc_00 && x1 < 0xe0_00 ) i ++
101+ }
102+ x [ j ++ ] = 63 // '?'
103+ } else if ( x0 >= mlen ) {
104+ x [ j ++ ] = 63 // '?'
105+ } else {
106+ const c0 = m [ x0 ]
107+ x [ j ++ ] = ! c0 && x0 ? 63 : c0
108+ }
109+ }
110+
111+ return new Uint8Array ( j === len ? x : x . subarray ( 0 , j ) )
88112}
89113
90114export function createSinglebyteEncoder ( encoding , { mode = 'fatal' } = { } ) {
91- // TODO: replacement, truncate (replacement will need varying length)
92- if ( mode !== 'fatal' ) throw new Error ( 'Unsupported mode' )
115+ const loose = mode === 'replacement'
116+ if ( mode !== 'fatal' && ! loose ) throw new Error ( 'Unsupported mode' )
93117 const m = encodeMap ( encoding ) // asserts
94118 const isLatin1 = encoding === 'iso-8859-1'
95119
96120 return ( s ) => {
97121 if ( typeof s !== 'string' ) throw new TypeError ( E_STRING )
98122 if ( isLatin1 ) {
99- if ( NON_LATIN . test ( s ) ) throw new TypeError ( E_STRICT )
100- const b = Buffer . from ( s , 'latin1' )
101- return new Uint8Array ( b . buffer , b . byteOffset , b . byteLength )
102- }
103-
104- // Instead of an ASCII regex check, encode optimistically - this is faster
105- // Check for 8-bit string with a regex though, this is instant on 8-bit strings so doesn't hurt the ASCII fast path
106- if ( ! NON_LATIN . test ( s ) ) {
123+ if ( ! NON_LATIN . test ( s ) ) {
124+ const b = Buffer . from ( s , 'latin1' )
125+ return new Uint8Array ( b . buffer , b . byteOffset , b . byteLength )
126+ }
127+
128+ if ( ! loose ) throw new TypeError ( E_STRICT )
129+ } else if ( ! NON_LATIN . test ( s ) ) {
130+ // Instead of an ASCII regex check, encode optimistically - this is faster
131+ // Check for 8-bit string with a regex though, this is instant on 8-bit strings so doesn't hurt the ASCII fast path
107132 const b = Buffer . from ( s , 'utf8' ) // ascii/latin1 coerces, we need to check
108133 if ( b . length === s . length ) return new Uint8Array ( b . buffer , b . byteOffset , b . byteLength )
109134 }
110135
111- const res = encode ( s , m )
136+ const res = encode ( s , m , loose )
112137 if ( ! res ) throw new TypeError ( E_STRICT )
113138 return res
114139 }
0 commit comments