From 6e08d8ef70e99580631254063e293b6871514f6e Mon Sep 17 00:00:00 2001 From: Denver Coneybeare Date: Tue, 8 Jul 2025 01:07:13 +0000 Subject: [PATCH 1/3] docs: add an example to the detailed comment in compareUtf8Strings() --- dev/src/order.ts | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/dev/src/order.ts b/dev/src/order.ts index 9588ef675..41ea58044 100644 --- a/dev/src/order.ts +++ b/dev/src/order.ts @@ -273,6 +273,22 @@ export function compareUtf8Strings(left: string, right: string): number { // used to represent code points greater than 0xFFFF which have 4-byte UTF-8 representations // and are lexicographically greater than the 1, 2, or 3-byte representations of code points // less than or equal to 0xFFFF. + // + // An example of why Case 2 is required is comparing the following two Unicode code points: + // + // |-----------------------|------------|---------------------|-----------------| + // | Name | Code Point | UTF-8 Encoding | UTF-16 Encoding | + // |-----------------------|------------|---------------------|-----------------| + // | Replacement Character | U+FFFD | 0xEF 0xBF 0xBD | 0xFFFD | + // | Grinning Face | U+1F600 | 0xF0 0x9F 0x98 0x80 | 0xD83D 0xDE00 | + // |-----------------------|------------|---------------------|-----------------| + // + // A lexicographical comparison of the UTF-8 encodings of these code points would order + // "Replacement Character" _before_ "Grinning Face" because 0xEF is less than 0xF0. However, a + // direct comparison of the UTF-16 code units, as would be done in case 1, would erroneously + // produce the _opposite_ ordering, because 0xFFFD is _greater than_ 0xD83D. As it turns out, + // this relative ordering holds for all comparisons of UTF-16 code points requiring a surrogate + // pair with those that do not. const length = Math.min(left.length, right.length); for (let i = 0; i < length; i++) { const leftChar = left.charAt(i); From 4bb47c66a90b323be68ca4f1b686aca0961431b2 Mon Sep 17 00:00:00 2001 From: Denver Coneybeare Date: Mon, 7 Jul 2025 21:10:35 -0400 Subject: [PATCH 2/3] npm run fix --- dev/src/order.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/src/order.ts b/dev/src/order.ts index 41ea58044..86c9925b7 100644 --- a/dev/src/order.ts +++ b/dev/src/order.ts @@ -288,7 +288,7 @@ export function compareUtf8Strings(left: string, right: string): number { // direct comparison of the UTF-16 code units, as would be done in case 1, would erroneously // produce the _opposite_ ordering, because 0xFFFD is _greater than_ 0xD83D. As it turns out, // this relative ordering holds for all comparisons of UTF-16 code points requiring a surrogate - // pair with those that do not. + // pair with those that do not. const length = Math.min(left.length, right.length); for (let i = 0; i < length; i++) { const leftChar = left.charAt(i); From 6c8afcc870ac82ca1547ff2a6d9c377135c48b1d Mon Sep 17 00:00:00 2001 From: Owl Bot Date: Tue, 8 Jul 2025 01:13:19 +0000 Subject: [PATCH 3/3] =?UTF-8?q?=F0=9F=A6=89=20Updates=20from=20OwlBot=20po?= =?UTF-8?q?st-processor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --- dev/src/order.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/src/order.ts b/dev/src/order.ts index 41ea58044..86c9925b7 100644 --- a/dev/src/order.ts +++ b/dev/src/order.ts @@ -288,7 +288,7 @@ export function compareUtf8Strings(left: string, right: string): number { // direct comparison of the UTF-16 code units, as would be done in case 1, would erroneously // produce the _opposite_ ordering, because 0xFFFD is _greater than_ 0xD83D. As it turns out, // this relative ordering holds for all comparisons of UTF-16 code points requiring a surrogate - // pair with those that do not. + // pair with those that do not. const length = Math.min(left.length, right.length); for (let i = 0; i < length; i++) { const leftChar = left.charAt(i);