@@ -64,6 +64,7 @@ U_NAMESPACE_BEGIN
6464
6565UOBJECT_DEFINE_RTTI_IMPLEMENTATION (CanonicalIterator)
6666
67+
6768/* *
6869 *@param source string to get results for
6970 */
@@ -73,10 +74,10 @@ CanonicalIterator::CanonicalIterator(const UnicodeString &sourceStr, UErrorCode
7374 pieces_lengths(nullptr ),
7475 current(nullptr ),
7576 current_length(0 ),
76- nfd(* Normalizer2::getNFDInstance (status)),
77- nfcImpl(* Normalizer2Factory::getNFCImpl (status))
77+ nfd(Normalizer2::getNFDInstance(status)),
78+ nfcImpl(Normalizer2Factory::getNFCImpl(status))
7879{
79- if (U_SUCCESS (status) && nfcImpl. ensureCanonIterData (status)) {
80+ if (U_SUCCESS (status) && nfcImpl-> ensureCanonIterData (status)) {
8081 setSource (sourceStr, status);
8182 }
8283}
@@ -172,7 +173,7 @@ void CanonicalIterator::setSource(const UnicodeString &newSource, UErrorCode &st
172173 int32_t i = 0 ;
173174 UnicodeString *list = nullptr ;
174175
175- nfd. normalize (newSource, source, status);
176+ nfd-> normalize (newSource, source, status);
176177 if (U_FAILURE (status)) {
177178 return ;
178179 }
@@ -194,7 +195,7 @@ void CanonicalIterator::setSource(const UnicodeString &newSource, UErrorCode &st
194195 current[0 ] = 0 ;
195196 pieces[0 ] = new UnicodeString[1 ];
196197 pieces_lengths[0 ] = 1 ;
197- if (pieces[0 ] == 0 ) {
198+ if (pieces[0 ] == nullptr ) {
198199 status = U_MEMORY_ALLOCATION_ERROR;
199200 goto CleanPartialInitialization;
200201 }
@@ -203,7 +204,7 @@ void CanonicalIterator::setSource(const UnicodeString &newSource, UErrorCode &st
203204
204205
205206 list = new UnicodeString[source.length ()];
206- if (list == 0 ) {
207+ if (list == nullptr ) {
207208 status = U_MEMORY_ALLOCATION_ERROR;
208209 goto CleanPartialInitialization;
209210 }
@@ -219,7 +220,7 @@ void CanonicalIterator::setSource(const UnicodeString &newSource, UErrorCode &st
219220 // on the NFD form - see above).
220221 for (; i < source.length (); i += U16_LENGTH (cp)) {
221222 cp = source.char32At (i);
222- if (nfcImpl. isCanonSegmentStarter (cp)) {
223+ if (nfcImpl-> isCanonSegmentStarter (cp)) {
223224 source.extract (start, i-start, list[list_length++]); // add up to i
224225 start = i;
225226 }
@@ -252,9 +253,7 @@ void CanonicalIterator::setSource(const UnicodeString &newSource, UErrorCode &st
252253 return ;
253254// Common section to cleanup all local variables and reset object variables.
254255CleanPartialInitialization:
255- if (list != nullptr ) {
256- delete[] list;
257- }
256+ delete[] list;
258257 cleanPieces ();
259258}
260259
@@ -264,10 +263,19 @@ void CanonicalIterator::setSource(const UnicodeString &newSource, UErrorCode &st
264263 * @param source the string to find permutations for
265264 * @return the results in a set.
266265 */
267- void U_EXPORT2 CanonicalIterator::permute (UnicodeString &source, UBool skipZeros, Hashtable *result, UErrorCode &status) {
266+ void U_EXPORT2 CanonicalIterator::permute (UnicodeString &source, UBool skipZeros, Hashtable *result, UErrorCode &status, int32_t depth ) {
268267 if (U_FAILURE (status)) {
269268 return ;
270269 }
270+ // To avoid infinity loop caused by permute, we limit the depth of recursive
271+ // call to permute and return U_UNSUPPORTED_ERROR.
272+ // We know in some unit test we need at least 4. Set to 8 just in case some
273+ // unforseen use cases.
274+ constexpr int32_t kPermuteDepthLimit = 8 ;
275+ if (depth > kPermuteDepthLimit ) {
276+ status = U_UNSUPPORTED_ERROR;
277+ return ;
278+ }
271279 // if (PROGRESS) printf("Permute: %s\n", UToS(Tr(source)));
272280 int32_t i = 0 ;
273281
@@ -277,7 +285,7 @@ void U_EXPORT2 CanonicalIterator::permute(UnicodeString &source, UBool skipZeros
277285 if (source.length () <= 2 && source.countChar32 () <= 1 ) {
278286 UnicodeString *toPut = new UnicodeString (source);
279287 /* test for nullptr */
280- if (toPut == 0 ) {
288+ if (toPut == nullptr ) {
281289 status = U_MEMORY_ALLOCATION_ERROR;
282290 return ;
283291 }
@@ -311,7 +319,7 @@ void U_EXPORT2 CanonicalIterator::permute(UnicodeString &source, UBool skipZeros
311319
312320 // see what the permutations of the characters before and after this one are
313321 // Hashtable *subpermute = permute(source.substring(0,i) + source.substring(i + UTF16.getCharCount(cp)));
314- permute (subPermuteString.remove (i, U16_LENGTH (cp)), skipZeros, &subpermute, status);
322+ permute (subPermuteString.remove (i, U16_LENGTH (cp)), skipZeros, &subpermute, status, depth+ 1 );
315323 /* Test for buffer overflows */
316324 if (U_FAILURE (status)) {
317325 return ;
@@ -346,7 +354,7 @@ UnicodeString* CanonicalIterator::getEquivalents(const UnicodeString &segment, i
346354 Hashtable permutations (status);
347355 Hashtable basic (status);
348356 if (U_FAILURE (status)) {
349- return 0 ;
357+ return nullptr ;
350358 }
351359 result.setValueDeleter (uprv_deleteUObject);
352360 permutations.setValueDeleter (uprv_deleteUObject);
@@ -381,7 +389,7 @@ UnicodeString* CanonicalIterator::getEquivalents(const UnicodeString &segment, i
381389 // UnicodeString *possible = new UnicodeString(*((UnicodeString *)(ne2->value.pointer)));
382390 UnicodeString possible (*((UnicodeString *)(ne2->value .pointer )));
383391 UnicodeString attempt;
384- nfd. normalize (possible, attempt, status);
392+ nfd-> normalize (possible, attempt, status);
385393
386394 // TODO: check if operator == is semanticaly the same as attempt.equals(segment)
387395 if (attempt==segment) {
@@ -399,15 +407,15 @@ UnicodeString* CanonicalIterator::getEquivalents(const UnicodeString &segment, i
399407
400408 /* Test for buffer overflows */
401409 if (U_FAILURE (status)) {
402- return 0 ;
410+ return nullptr ;
403411 }
404412 // convert into a String[] to clean up storage
405413 // String[] finalResult = new String[result.size()];
406414 UnicodeString *finalResult = nullptr ;
407415 int32_t resultCount;
408416 if ((resultCount = result.count ()) != 0 ) {
409417 finalResult = new UnicodeString[resultCount];
410- if (finalResult == 0 ) {
418+ if (finalResult == nullptr ) {
411419 status = U_MEMORY_ALLOCATION_ERROR;
412420 return nullptr ;
413421 }
@@ -448,7 +456,7 @@ Hashtable *CanonicalIterator::getEquivalents2(Hashtable *fillinResult, const cha
448456 for (int32_t i = 0 ; i < segLen; i += U16_LENGTH (cp)) {
449457 // see if any character is at the start of some decomposition
450458 U16_GET (segment, 0 , i, segLen, cp);
451- if (!nfcImpl. getCanonStartSet (cp, starts)) {
459+ if (!nfcImpl-> getCanonStartSet (cp, starts)) {
452460 continue ;
453461 }
454462 // if so, see which decompositions match
@@ -471,7 +479,7 @@ Hashtable *CanonicalIterator::getEquivalents2(Hashtable *fillinResult, const cha
471479 UnicodeString item = *((UnicodeString *)(ne->value .pointer ));
472480 UnicodeString *toAdd = new UnicodeString (prefix);
473481 /* test for nullptr */
474- if (toAdd == 0 ) {
482+ if (toAdd == nullptr ) {
475483 status = U_MEMORY_ALLOCATION_ERROR;
476484 return nullptr ;
477485 }
@@ -509,7 +517,7 @@ Hashtable *CanonicalIterator::extract(Hashtable *fillinResult, UChar32 comp, con
509517 UnicodeString temp (comp);
510518 int32_t inputLen=temp.length ();
511519 UnicodeString decompString;
512- nfd. normalize (temp, decompString, status);
520+ nfd-> normalize (temp, decompString, status);
513521 if (U_FAILURE (status)) {
514522 return nullptr ;
515523 }
@@ -573,7 +581,7 @@ Hashtable *CanonicalIterator::extract(Hashtable *fillinResult, UChar32 comp, con
573581 // brute force approach
574582 // check to make sure result is canonically equivalent
575583 UnicodeString trial;
576- nfd. normalize (temp, trial, status);
584+ nfd-> normalize (temp, trial, status);
577585 if (U_FAILURE (status) || trial.compare (segment+segmentPos, segLen - segmentPos) != 0 ) {
578586 return nullptr ;
579587 }
0 commit comments