@@ -61,7 +61,7 @@ export function createSinglebyteDecoder(encoding, loose = false) {
6161
6262const NON_LATIN = / [ ^ \x00 - \xFF ] / // eslint-disable-line no-control-regex
6363
64- function encode ( s , m ) {
64+ function encode ( s , m , loose ) {
6565 const len = s . length
6666 let i = 0
6767 const b = Buffer . from ( s , 'utf-16le' ) // aligned
@@ -70,7 +70,7 @@ function encode(s, m) {
7070 for ( const len3 = len - 3 ; i < len3 ; i += 4 ) {
7171 const x0 = x [ i ] , x1 = x [ i + 1 ] , x2 = x [ i + 2 ] , x3 = x [ i + 3 ] // prettier-ignore
7272 const c0 = m [ x0 ] , c1 = m [ x1 ] , c2 = m [ x2 ] , c3 = m [ x3 ] // prettier-ignore
73- if ( ! ( c0 && c1 && c2 && c3 ) && ( ( ! c0 && x0 ) || ( ! c1 && x1 ) || ( ! c2 && x2 ) || ( ! c3 && x3 ) ) ) return null // prettier-ignore
73+ if ( ! ( c0 && c1 && c2 && c3 ) && ( ( ! c0 && x0 ) || ( ! c1 && x1 ) || ( ! c2 && x2 ) || ( ! c3 && x3 ) ) ) break
7474 x [ i ] = c0
7575 x [ i + 1 ] = c1
7676 x [ i + 2 ] = c2
@@ -80,35 +80,56 @@ function encode(s, m) {
8080 for ( ; i < len ; i ++ ) {
8181 const x0 = x [ i ]
8282 const c0 = m [ x0 ]
83- if ( ! c0 && x0 ) return null
83+ if ( ! c0 && x0 ) break
8484 x [ i ] = c0
8585 }
8686
87+ if ( i < len ) {
88+ if ( ! loose ) return null
89+ let j = i
90+ while ( i < len ) {
91+ const x0 = x [ i ++ ]
92+ let c0 = m [ x0 ]
93+ if ( ! c0 && x0 ) {
94+ c0 = 63 // '?'
95+ if ( x0 >= 0xd8_00 && x0 < 0xdc_00 && i < len ) {
96+ const x1 = x [ i ]
97+ if ( x1 >= 0xdc_00 && x1 < 0xe0_00 ) i ++
98+ }
99+ }
100+
101+ x [ j ++ ] = c0
102+ }
103+
104+ return new Uint8Array ( j === len ? x : x . subarray ( 0 , j ) )
105+ }
106+
87107 return new Uint8Array ( x )
88108}
89109
90110export function createSinglebyteEncoder ( encoding , { mode = 'fatal' } = { } ) {
91- // TODO: replacement, truncate (replacement will need varying length)
92- if ( mode !== 'fatal' ) throw new Error ( 'Unsupported mode' )
111+ const loose = mode === 'replacement'
112+ if ( mode !== 'fatal' && ! loose ) throw new Error ( 'Unsupported mode' )
93113 const m = encodeMap ( encoding ) // asserts
94114 const isLatin1 = encoding === 'iso-8859-1'
95115
96116 return ( s ) => {
97117 if ( typeof s !== 'string' ) throw new TypeError ( E_STRING )
98118 if ( isLatin1 ) {
99- if ( NON_LATIN . test ( s ) ) throw new TypeError ( E_STRICT )
100- const b = Buffer . from ( s , 'latin1' )
101- return new Uint8Array ( b . buffer , b . byteOffset , b . byteLength )
102- }
103-
104- // Instead of an ASCII regex check, encode optimistically - this is faster
105- // Check for 8-bit string with a regex though, this is instant on 8-bit strings so doesn't hurt the ASCII fast path
106- if ( ! NON_LATIN . test ( s ) ) {
119+ if ( ! NON_LATIN . test ( s ) ) {
120+ const b = Buffer . from ( s , 'latin1' )
121+ return new Uint8Array ( b . buffer , b . byteOffset , b . byteLength )
122+ }
123+
124+ if ( ! loose ) throw new TypeError ( E_STRICT )
125+ } else if ( ! NON_LATIN . test ( s ) ) {
126+ // Instead of an ASCII regex check, encode optimistically - this is faster
127+ // Check for 8-bit string with a regex though, this is instant on 8-bit strings so doesn't hurt the ASCII fast path
107128 const b = Buffer . from ( s , 'utf8' ) // ascii/latin1 coerces, we need to check
108129 if ( b . length === s . length ) return new Uint8Array ( b . buffer , b . byteOffset , b . byteLength )
109130 }
110131
111- const res = encode ( s , m )
132+ const res = encode ( s , m , loose )
112133 if ( ! res ) throw new TypeError ( E_STRICT )
113134 return res
114135 }
0 commit comments