@@ -147,26 +147,28 @@ isPOSIXClose(const UnicodeString &pattern, int32_t pos) {
147147// Strips leading and trailing spaces and turns runs of spaces into single spaces.
148148// This should be replaced by UAX44-LM1 and UAX44-LM2 skeletonizations as part of ICU-3736.
149149template <typename CharT>
150- UBool mungeCharName (char * dst, std::basic_string_view<CharT> src, int32_t dstCapacity) {
150+ UBool mungeCharName (std::basic_string_view<CharT> src, char * dst , int32_t dstCapacity) {
151151 int32_t j = 0 ;
152152 --dstCapacity; /* make room for term. zero */
153153 if constexpr (!std::is_same_v<CharT, char >) {
154154 if (!uprv_isInvariantUString (src.data (), static_cast <int32_t >(src.size ()))) {
155155 return false ;
156156 }
157157 }
158- for (CharT ch : src) {
159- if (ch == ' ' && (j == 0 || (j > 0 && dst[j - 1 ] == ' ' ))) {
160- continue ;
161- }
162- if (j >= dstCapacity) return false ;
158+ for (CharT uch : src) {
159+ char ch;
163160 if constexpr (std::is_same_v<CharT, char >) {
164- dst[j++] = ch ;
161+ ch = uch ;
165162 } else {
166163 // This would want to be UCHAR_TO_CHAR but that is defined in uinvchar.cpp. This function
167164 // should not last long anyway (famous last words)…
168- u_UCharsToChars (&ch , &dst[j++] , 1 );
165+ u_UCharsToChars (&uch , &ch , 1 );
169166 }
167+ if (ch == ' ' && (j == 0 || (j > 0 && dst[j - 1 ] == ' ' ))) {
168+ continue ;
169+ }
170+ if (j >= dstCapacity) return false ;
171+ dst[j++] = ch;
170172 }
171173 if (j > 0 && dst[j-1 ] == ' ' ) --j;
172174 dst[j] = 0 ;
@@ -179,8 +181,8 @@ template<typename CharT>
179181UChar32 getCharacterByName (const std::basic_string_view<CharT> name) {
180182 // Must munge name, since u_charFromName() does not do 'loose' matching.
181183 char buf[128 ]; // it suffices that this be > uprv_getMaxCharNameLength
182- if (!mungeCharName<CharT>(buf, name , sizeof (buf))) {
183- return U_SENTINEL;
184+ if (!mungeCharName (name, buf , sizeof (buf))) {
185+ return U_SENTINEL;
184186 }
185187 for (const UCharNameChoice nameChoice : std::array{U_EXTENDED_CHAR_NAME, U_CHAR_NAME_ALIAS}) {
186188 UErrorCode ec = U_ZERO_ERROR;
@@ -702,10 +704,10 @@ class UnicodeSet::Lexer {
702704 }
703705 start = parsePosition_.getIndex ();
704706 } else if (last == u' }' ) {
705- const UChar32 result = getCharacterByName (
706- std::u16string_view (pattern_). substr ( start, parsePosition_.getIndex () - 1 - start));
707- if (result == U_SENTINEL || (hex.has_value () && result != hex) ||
708- (literal.has_value () && result != literal)) {
707+ const UChar32 result = getCharacterByName (std::u16string_view (pattern_). substr (
708+ start, parsePosition_.getIndex () - 1 - start));
709+ if (result < 0 || (hex.has_value () && result != hex) ||
710+ (literal.has_value () && result != literal)) {
709711 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
710712 return {};
711713 }
@@ -1475,8 +1477,9 @@ UnicodeSet::applyPropertyAlias(const UnicodeString& prop,
14751477 }
14761478 case UCHAR_NAME:
14771479 {
1478- const UChar32 ch = getCharacterByName<char >(vname.data ());
1479- if (ch == U_SENTINEL) {
1480+ const UChar32 ch =
1481+ getCharacterByName<char >(std::string_view (vname.data (), vname.length ()));
1482+ if (ch < 0 ) {
14801483 FAIL (ec);
14811484 }
14821485 add (ch);
@@ -1490,7 +1493,9 @@ UnicodeSet::applyPropertyAlias(const UnicodeString& prop,
14901493 // Must munge name, since u_versionFromString() does not do
14911494 // 'loose' matching.
14921495 char buf[128 ];
1493- if (!mungeCharName<char >(buf, vname.data (), sizeof (buf))) FAIL (ec);
1496+ if (!mungeCharName (std::string_view (vname.data (), vname.length ()), buf,
1497+ sizeof (buf)))
1498+ FAIL (ec);
14941499 UVersionInfo version;
14951500 u_versionFromString (version, buf);
14961501 applyFilter (versionFilter, &version,
0 commit comments