diff --git a/icu4c/source/i18n/utf16collationiterator.cpp b/icu4c/source/i18n/utf16collationiterator.cpp index 64c454b5d368..8349e23bd1bc 100644 --- a/icu4c/source/i18n/utf16collationiterator.cpp +++ b/icu4c/source/i18n/utf16collationiterator.cpp @@ -295,9 +295,21 @@ FCDUTF16CollationIterator::previousCodePoint(UErrorCode &errorCode) { return U_SENTINEL; } c = *--pos; - if(CollationFCD::hasLccc(c)) { - if(CollationFCD::maybeTibetanCompositeVowel(c) || - (pos != start && CollationFCD::hasTccc(*(pos - 1)))) { + if (CollationFCD::hasLccc(c)) { + UBool normalizePrevSegment = CollationFCD::maybeTibetanCompositeVowel(c); + if (!normalizePrevSegment && pos != start) { + if (U16_IS_TRAIL(*(pos - 1))) { + // CollationFCD::hasTccc() always returns false. + // To test possible trailing ccc, we need to check high surrogate + // (or previous character for broken surrogate pair). + if (pos - 1 != start) { + normalizePrevSegment = CollationFCD::hasTccc(*(pos - 2)); + } + } else { + normalizePrevSegment = CollationFCD::hasTccc(*(pos - 1)); + } + } + if (normalizePrevSegment) { ++pos; if(!previousSegment(errorCode)) { return U_SENTINEL; diff --git a/icu4c/source/test/intltest/collationtest.cpp b/icu4c/source/test/intltest/collationtest.cpp index bdab15d312a1..d5d7e1e61b84 100644 --- a/icu4c/source/test/intltest/collationtest.cpp +++ b/icu4c/source/test/intltest/collationtest.cpp @@ -90,6 +90,7 @@ class CollationTest : public IntlTest { void TestCollatorPredicateTypes(); void TestUCollatorPredicateTypes(); void TestCollatorMap(); + void TestColItrInfiniteLoop22511(); private: void checkFCD(const char *name, CollationIterator &ci, CodePointIterator &cpi); @@ -168,6 +169,7 @@ void CollationTest::runIndexedTest(int32_t index, UBool exec, const char *&name, TESTCASE_AUTO(TestCollatorPredicateTypes); TESTCASE_AUTO(TestUCollatorPredicateTypes); TESTCASE_AUTO(TestCollatorMap); + TESTCASE_AUTO(TestColItrInfiniteLoop22511); TESTCASE_AUTO_END; } @@ -1912,6 +1914,7 @@ void CollationTest::TestHang22414() { errorCode.reset(); } } + void CollationTest::TestBuilderContextsOverflow() { IcuTestErrorCode errorCode(*this, "TestBuilderContextsOverflow"); // ICU-20715: Bad memory access in what looks like a bogus CharsTrie after @@ -2123,4 +2126,30 @@ void CollationTest::TestCollatorMap() { assertEquals(R"(u16m["a"])", 2, um[u"a"]); } +void CollationTest::TestColItrInfiniteLoop22511() { + IcuTestErrorCode errorCode(*this, "TestColItrInfiniteLoop22511"); + char16_t str1[] = { + 0x0100, 0x032a, 0x01e0, 0xd804, 0xdd00, 0x031c + }; + char16_t str2[] = { + 0x0041, 0x0304, 0x032a, 0x01e0, 0xd804, 0xdd00, 0x031c + }; + int32_t num_locales = 0; + const icu::Locale* locales = icu::Locale::getAvailableLocales(num_locales); + for (int32_t i = 0; i < num_locales; i++) { + errorCode.reset(); + icu::Locale l = locales[i]; + LocalPointer coll(Collator::createInstance(l, errorCode)); + errorCode.assertSuccess(); + coll->setStrength(icu::Collator::IDENTICAL); + UCollationResult result = coll->compare( + str1, sizeof(str1)/sizeof(char16_t), + str2, sizeof(str2)/sizeof(char16_t), + errorCode); + errorCode.assertSuccess(); + assertEquals(UnicodeString("Locale ") + l.getName(), UCOL_EQUAL, result); + } +} + + #endif // !UCONFIG_NO_COLLATION diff --git a/icu4j/main/collate/src/main/java/com/ibm/icu/impl/coll/FCDUTF16CollationIterator.java b/icu4j/main/collate/src/main/java/com/ibm/icu/impl/coll/FCDUTF16CollationIterator.java index 20b1187c9653..e30990fedbbe 100644 --- a/icu4j/main/collate/src/main/java/com/ibm/icu/impl/coll/FCDUTF16CollationIterator.java +++ b/icu4j/main/collate/src/main/java/com/ibm/icu/impl/coll/FCDUTF16CollationIterator.java @@ -137,8 +137,20 @@ public int previousCodePoint() { } c = seq.charAt(--pos); if (CollationFCD.hasLccc(c)) { - if (CollationFCD.maybeTibetanCompositeVowel(c) - || (pos != start && CollationFCD.hasTccc(seq.charAt(pos - 1)))) { + boolean normalizePrevSegment = CollationFCD.maybeTibetanCompositeVowel(c); + if (!normalizePrevSegment && pos != start) { + if (Character.isLowSurrogate(seq.charAt(pos - 1))) { + // CollationFCD.hasTccc() always returns false. + // To test possible trailing ccc, we need to check high surrogate + // (or previous character for broken surrogate pair). + if (pos - 1 != start) { + normalizePrevSegment = CollationFCD.hasTccc(seq.charAt(pos - 2)); + } + } else { + normalizePrevSegment = CollationFCD.hasTccc(seq.charAt(pos - 1)); + } + } + if (normalizePrevSegment) { ++pos; previousSegment(); c = seq.charAt(--pos); diff --git a/icu4j/main/collate/src/test/java/com/ibm/icu/dev/test/collator/CollationTest.java b/icu4j/main/collate/src/test/java/com/ibm/icu/dev/test/collator/CollationTest.java index 93d418782b86..437ec2514b76 100644 --- a/icu4j/main/collate/src/test/java/com/ibm/icu/dev/test/collator/CollationTest.java +++ b/icu4j/main/collate/src/test/java/com/ibm/icu/dev/test/collator/CollationTest.java @@ -1960,4 +1960,20 @@ public void TestBuilderContextsOverflow() { errln("unexpected type of exception: " + e); } } + + @Test + public void TestColItrInfiniteLoop22511() { + // ICU-22511 Locale vi and wo triggers infinite loop for getting + // collation key for these strings. + final String str1 = "\u0100\u032a\u01e0\ud804\udd00\u031c"; + final String str2 = "\u0041\u0304\u032a\u01e0\ud804\udd00\u031c"; + + ULocale[] locales = ULocale.getAvailableLocales(); + for (ULocale loc : locales) { + Collator coll = Collator.getInstance(loc); + coll.setStrength(Collator.IDENTICAL); + int cmp = coll.compare(str1, str2); + assertEquals("Locale " + loc.toString(), 0, cmp); + } + } }