Skip to content

Commit ff4a19b

Browse files
committed
After Markus’s review
1 parent f9f5294 commit ff4a19b

File tree

1 file changed

+22
-17
lines changed

1 file changed

+22
-17
lines changed

icu4c/source/common/uniset_props.cpp

Lines changed: 22 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -147,26 +147,28 @@ isPOSIXClose(const UnicodeString &pattern, int32_t pos) {
147147
// Strips leading and trailing spaces and turns runs of spaces into single spaces.
148148
// This should be replaced by UAX44-LM1 and UAX44-LM2 skeletonizations as part of ICU-3736.
149149
template<typename CharT>
150-
UBool mungeCharName(char* dst, std::basic_string_view<CharT> src, int32_t dstCapacity) {
150+
UBool mungeCharName(std::basic_string_view<CharT> src, char* dst, int32_t dstCapacity) {
151151
int32_t j = 0;
152152
--dstCapacity; /* make room for term. zero */
153153
if constexpr (!std::is_same_v<CharT, char>) {
154154
if (!uprv_isInvariantUString(src.data(), static_cast<int32_t>(src.size()))) {
155155
return false;
156156
}
157157
}
158-
for (CharT ch : src) {
159-
if (ch == ' ' && (j == 0 || (j > 0 && dst[j - 1] == ' '))) {
160-
continue;
161-
}
162-
if (j >= dstCapacity) return false;
158+
for (CharT uch : src) {
159+
char ch;
163160
if constexpr (std::is_same_v<CharT, char>) {
164-
dst[j++] = ch;
161+
ch = uch;
165162
} else {
166163
// This would want to be UCHAR_TO_CHAR but that is defined in uinvchar.cpp. This function
167164
// should not last long anyway (famous last words)…
168-
u_UCharsToChars(&ch, &dst[j++], 1);
165+
u_UCharsToChars(&uch, &ch, 1);
169166
}
167+
if (ch == ' ' && (j == 0 || (j > 0 && dst[j - 1] == ' '))) {
168+
continue;
169+
}
170+
if (j >= dstCapacity) return false;
171+
dst[j++] = ch;
170172
}
171173
if (j > 0 && dst[j-1] == ' ') --j;
172174
dst[j] = 0;
@@ -179,8 +181,8 @@ template<typename CharT>
179181
UChar32 getCharacterByName(const std::basic_string_view<CharT> name) {
180182
// Must munge name, since u_charFromName() does not do 'loose' matching.
181183
char buf[128]; // it suffices that this be > uprv_getMaxCharNameLength
182-
if (!mungeCharName<CharT>(buf, name, sizeof(buf))) {
183-
return U_SENTINEL;
184+
if (!mungeCharName(name, buf, sizeof(buf))) {
185+
return U_SENTINEL;
184186
}
185187
for (const UCharNameChoice nameChoice : std::array{U_EXTENDED_CHAR_NAME, U_CHAR_NAME_ALIAS}) {
186188
UErrorCode ec = U_ZERO_ERROR;
@@ -702,10 +704,10 @@ class UnicodeSet::Lexer {
702704
}
703705
start = parsePosition_.getIndex();
704706
} else if (last == u'}') {
705-
const UChar32 result = getCharacterByName(
706-
std::u16string_view(pattern_).substr(start, parsePosition_.getIndex() - 1 - start));
707-
if (result == U_SENTINEL || (hex.has_value() && result != hex) ||
708-
(literal.has_value() && result != literal)) {
707+
const UChar32 result = getCharacterByName(std::u16string_view(pattern_).substr(
708+
start, parsePosition_.getIndex() - 1 - start));
709+
if (result < 0 || (hex.has_value() && result != hex) ||
710+
(literal.has_value() && result != literal)) {
709711
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
710712
return {};
711713
}
@@ -1475,8 +1477,9 @@ UnicodeSet::applyPropertyAlias(const UnicodeString& prop,
14751477
}
14761478
case UCHAR_NAME:
14771479
{
1478-
const UChar32 ch = getCharacterByName<char>(vname.data());
1479-
if (ch == U_SENTINEL) {
1480+
const UChar32 ch =
1481+
getCharacterByName<char>(std::string_view(vname.data(), vname.length()));
1482+
if (ch < 0) {
14801483
FAIL(ec);
14811484
}
14821485
add(ch);
@@ -1490,7 +1493,9 @@ UnicodeSet::applyPropertyAlias(const UnicodeString& prop,
14901493
// Must munge name, since u_versionFromString() does not do
14911494
// 'loose' matching.
14921495
char buf[128];
1493-
if (!mungeCharName<char>(buf, vname.data(), sizeof(buf))) FAIL(ec);
1496+
if (!mungeCharName(std::string_view(vname.data(), vname.length()), buf,
1497+
sizeof(buf)))
1498+
FAIL(ec);
14941499
UVersionInfo version;
14951500
u_versionFromString(version, buf);
14961501
applyFilter(versionFilter, &version,

0 commit comments

Comments
 (0)