@@ -61,16 +61,17 @@ export function createSinglebyteDecoder(encoding, loose = false) {
6161
6262const NON_LATIN = / [ ^ \x00 - \xFF ] / // eslint-disable-line no-control-regex
6363
64- function encode ( s , m ) {
64+ function encode ( s , m , loose ) {
6565 const len = s . length
6666 let i = 0
6767 const b = Buffer . from ( s , 'utf-16le' ) // aligned
6868 if ( ! isLE ) b . swap16 ( )
6969 const x = new Uint16Array ( b . buffer , b . byteOffset , b . byteLength / 2 )
70+ if ( ! m || m . length < 256 ) return null // perf
7071 for ( const len3 = len - 3 ; i < len3 ; i += 4 ) {
7172 const x0 = x [ i ] , x1 = x [ i + 1 ] , x2 = x [ i + 2 ] , x3 = x [ i + 3 ] // prettier-ignore
7273 const c0 = m [ x0 ] , c1 = m [ x1 ] , c2 = m [ x2 ] , c3 = m [ x3 ] // prettier-ignore
73- if ( ! ( c0 && c1 && c2 && c3 ) && ( ( ! c0 && x0 ) || ( ! c1 && x1 ) || ( ! c2 && x2 ) || ( ! c3 && x3 ) ) ) return null // prettier-ignore
74+ if ( ! ( c0 && c1 && c2 && c3 ) && ( ( ! c0 && x0 ) || ( ! c1 && x1 ) || ( ! c2 && x2 ) || ( ! c3 && x3 ) ) ) break
7475 x [ i ] = c0
7576 x [ i + 1 ] = c1
7677 x [ i + 2 ] = c2
@@ -80,35 +81,53 @@ function encode(s, m) {
8081 for ( ; i < len ; i ++ ) {
8182 const x0 = x [ i ]
8283 const c0 = m [ x0 ]
83- if ( ! c0 && x0 ) return null
84+ if ( ! c0 && x0 ) break
8485 x [ i ] = c0
8586 }
8687
87- return new Uint8Array ( x )
88+ if ( i === len ) return new Uint8Array ( x )
89+ if ( ! loose ) return null
90+ let j = i
91+ while ( i < len ) {
92+ const x0 = x [ i ++ ]
93+ let c0 = m [ x0 ]
94+ if ( ! c0 && x0 ) {
95+ c0 = 63 // '?'
96+ if ( x0 >= 0xd8_00 && x0 < 0xdc_00 && i < len ) {
97+ const x1 = x [ i ]
98+ if ( x1 >= 0xdc_00 && x1 < 0xe0_00 ) i ++
99+ }
100+ }
101+
102+ x [ j ++ ] = c0
103+ }
104+
105+ return new Uint8Array ( j === len ? x : x . subarray ( 0 , j ) )
88106}
89107
90108export function createSinglebyteEncoder ( encoding , { mode = 'fatal' } = { } ) {
91- // TODO: replacement, truncate (replacement will need varying length)
92- if ( mode !== 'fatal' ) throw new Error ( 'Unsupported mode' )
109+ const loose = mode === 'replacement'
110+ if ( mode !== 'fatal' && ! loose ) throw new Error ( 'Unsupported mode' )
93111 const m = encodeMap ( encoding ) // asserts
94112 const isLatin1 = encoding === 'iso-8859-1'
95113
96114 return ( s ) => {
97115 if ( typeof s !== 'string' ) throw new TypeError ( E_STRING )
98116 if ( isLatin1 ) {
99- if ( NON_LATIN . test ( s ) ) throw new TypeError ( E_STRICT )
100- const b = Buffer . from ( s , 'latin1' )
101- return new Uint8Array ( b . buffer , b . byteOffset , b . byteLength )
102- }
103-
104- // Instead of an ASCII regex check, encode optimistically - this is faster
105- // Check for 8-bit string with a regex though, this is instant on 8-bit strings so doesn't hurt the ASCII fast path
106- if ( ! NON_LATIN . test ( s ) ) {
117+ if ( ! NON_LATIN . test ( s ) ) {
118+ const b = Buffer . from ( s , 'latin1' )
119+ return new Uint8Array ( b . buffer , b . byteOffset , b . byteLength )
120+ }
121+
122+ if ( ! loose ) throw new TypeError ( E_STRICT )
123+ } else if ( ! NON_LATIN . test ( s ) ) {
124+ // Instead of an ASCII regex check, encode optimistically - this is faster
125+ // Check for 8-bit string with a regex though, this is instant on 8-bit strings so doesn't hurt the ASCII fast path
107126 const b = Buffer . from ( s , 'utf8' ) // ascii/latin1 coerces, we need to check
108127 if ( b . length === s . length ) return new Uint8Array ( b . buffer , b . byteOffset , b . byteLength )
109128 }
110129
111- const res = encode ( s , m )
130+ const res = encode ( s , m , loose )
112131 if ( ! res ) throw new TypeError ( E_STRICT )
113132 return res
114133 }
0 commit comments