@@ -273,6 +273,22 @@ export function compareUtf8Strings(left: string, right: string): number {
273273  // used to represent code points greater than 0xFFFF which have 4-byte UTF-8 representations 
274274  // and are lexicographically greater than the 1, 2, or 3-byte representations of code points 
275275  // less than or equal to 0xFFFF. 
276+   // 
277+   // An example of why Case 2 is required is comparing the following two Unicode code points: 
278+   // 
279+   // |-----------------------|------------|---------------------|-----------------| 
280+   // | Name                  | Code Point | UTF-8 Encoding      | UTF-16 Encoding | 
281+   // |-----------------------|------------|---------------------|-----------------| 
282+   // | Replacement Character | U+FFFD     | 0xEF 0xBF 0xBD      | 0xFFFD          | 
283+   // | Grinning Face         | U+1F600    | 0xF0 0x9F 0x98 0x80 | 0xD83D 0xDE00   | 
284+   // |-----------------------|------------|---------------------|-----------------| 
285+   // 
286+   // A lexicographical comparison of the UTF-8 encodings of these code points would order 
287+   // "Replacement Character" _before_ "Grinning Face" because 0xEF is less than 0xF0. However, a 
288+   // direct comparison of the UTF-16 code units, as would be done in case 1, would erroneously 
289+   // produce the _opposite_ ordering, because 0xFFFD is _greater than_ 0xD83D. As it turns out, 
290+   // this relative ordering holds for all comparisons of UTF-16 code points requiring a surrogate 
291+   // pair with those that do not. 
276292  const  length  =  Math . min ( left . length ,  right . length ) ; 
277293  for  ( let  i  =  0 ;  i  <  length ;  i ++ )  { 
278294    const  leftChar  =  left . charAt ( i ) ; 
0 commit comments