File tree Expand file tree Collapse file tree 2 files changed +30
-1
lines changed
Expand file tree Collapse file tree 2 files changed +30
-1
lines changed Original file line number Diff line number Diff line change @@ -119,4 +119,30 @@ describe("utf8", () => {
119119 const dec = decode ( enc ) ;
120120 expect ( dec ) . toEqual ( s ) ;
121121 } ) ;
122+
123+ it ( "should reject invalid UTF-16 strings with unpaired surrogates" , ( ) => {
124+ // High surrogate without low surrogate
125+ expect ( ( ) => encode ( '\ud800' ) ) . toThrowError ( / i n v a l i d s t r i n g / ) ;
126+ expect ( ( ) => encode ( '\udbff' ) ) . toThrowError ( / i n v a l i d s t r i n g / ) ;
127+
128+ // Low surrogate without high surrogate
129+ expect ( ( ) => encode ( '\udc00' ) ) . toThrowError ( / i n v a l i d s t r i n g / ) ;
130+ expect ( ( ) => encode ( '\udfff' ) ) . toThrowError ( / i n v a l i d s t r i n g / ) ;
131+
132+ // High surrogate at the end of string
133+ expect ( ( ) => encode ( 'hello\ud800' ) ) . toThrowError ( / i n v a l i d s t r i n g / ) ;
134+
135+ // Low surrogate at the beginning
136+ expect ( ( ) => encode ( '\udc00world' ) ) . toThrowError ( / i n v a l i d s t r i n g / ) ;
137+
138+ // Two high surrogates in a row (second one is unpaired)
139+ expect ( ( ) => encode ( '\ud800\ud800\udc00' ) ) . toThrowError ( / i n v a l i d s t r i n g / ) ;
140+
141+ // Low surrogate followed by high surrogate (wrong order)
142+ expect ( ( ) => encode ( '\udc00\ud800' ) ) . toThrowError ( / i n v a l i d s t r i n g / ) ;
143+
144+ // Valid surrogate pair should work
145+ expect ( ( ) => encode ( '\ud800\udc00' ) ) . not . toThrow ( ) ; // U+10000
146+ expect ( ( ) => encode ( '\udbff\udfff' ) ) . not . toThrow ( ) ; // U+10FFFF
147+ } ) ;
122148} ) ;
Original file line number Diff line number Diff line change @@ -53,7 +53,7 @@ export function encodedLength(s: string): number {
5353 let c = s . charCodeAt ( i ) ;
5454
5555 if ( c >= 0xd800 && c <= 0xdbff ) {
56- // surrogate pair
56+ // High surrogate, must be followed by low surrogate.
5757 if ( i === s . length - 1 ) {
5858 throw new Error ( INVALID_UTF16 ) ;
5959 }
@@ -63,6 +63,9 @@ export function encodedLength(s: string): number {
6363 throw new Error ( INVALID_UTF16 ) ;
6464 }
6565 c = ( ( c - 0xd800 ) << 10 ) + ( c2 - 0xdc00 ) + 0x10000 ;
66+ } else if ( c >= 0xdc00 && c <= 0xdfff ) {
67+ // Low surrogate without preceding high surrogate.
68+ throw new Error ( INVALID_UTF16 ) ;
6669 }
6770
6871 if ( c < 0x80 ) {
You can’t perform that action at this time.
0 commit comments