Skip to content

Commit 202b65f

Browse files
committed
Combining_Mark
1 parent dfcd1f5 commit 202b65f

File tree

2 files changed

+26
-17
lines changed

2 files changed

+26
-17
lines changed

UnicodeJsps/src/main/java/org/unicode/jsp/UnicodeSetUtilities.java

Lines changed: 19 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import com.ibm.icu.util.ULocale;
1010
import com.ibm.icu.util.VersionInfo;
1111
import java.text.ParsePosition;
12+
import java.util.Arrays;
1213
import java.util.Comparator;
1314
import java.util.List;
1415
import java.util.regex.Pattern;
@@ -221,15 +222,15 @@ public boolean applyPropertyAlias(
221222
return status;
222223
}
223224

224-
private static String[][] COARSE_GENERAL_CATEGORIES = {
225-
{"Other", "C", "Cc", "Cf", "Cn", "Co", "Cs"},
226-
{"Letter", "L", "Ll", "Lm", "Lo", "Lt", "Lu"},
227-
{"Cased_Letter", "LC", "Ll", "Lt", "Lu"},
228-
{"Mark", "M", "Mc", "Me", "Mn"},
229-
{"Number", "N", "Nd", "Nl", "No"},
230-
{"Punctuation", "P", "Pc", "Pd", "Pe", "Pf", "Pi", "Po", "Ps"},
231-
{"Symbol", "S", "Sc", "Sk", "Sm", "So"},
232-
{"Separator", "Z", "Zl", "Zp", "Zs"},
225+
private static String[][][] COARSE_GENERAL_CATEGORIES = {
226+
{{"Other", "C"}, {"Cc", "Cf", "Cn", "Co", "Cs"}},
227+
{{"Letter", "L"}, {"Ll", "Lm", "Lo", "Lt", "Lu"}},
228+
{{"Cased_Letter", "LC"}, {"Ll", "Lt", "Lu"}},
229+
{{"Mark", "M", "Combining_Mark"}, {"Mc", "Me", "Mn"}},
230+
{{"Number", "N"}, {"Nd", "Nl", "No"}},
231+
{{"Punctuation", "P"}, {"Pc", "Pd", "Pe", "Pf", "Pi", "Po", "Ps"}},
232+
{{"Symbol", "S"}, {"Sc", "Sk", "Sm", "So"}},
233+
{{"Separator", "Z"}, {"Zl", "Zp", "Zs"}},
233234
};
234235

235236
// TODO(eggrobin): I think this function only ever returns true; might as well make it void.
@@ -304,13 +305,15 @@ private boolean applyPropertyAlias0(
304305
UnicodePropertySymbolTable::parseVersionInfoOrMax));
305306
} else {
306307
if (prop.getName().equals("General_Category")) {
307-
for (String[] coarseValue : COARSE_GENERAL_CATEGORIES) {
308-
final String longName = coarseValue[0];
309-
final String shortName = coarseValue[1];
310-
if (UnicodeProperty.equalNames(propertyValue, longName)
311-
|| UnicodeProperty.equalNames(propertyValue, shortName)) {
312-
for (int i = 2; i < coarseValue.length; ++i) {
313-
prop.getSet(coarseValue[i], result);
308+
for (String[][] coarseValue : COARSE_GENERAL_CATEGORIES) {
309+
final String[] aliases = coarseValue[0];
310+
if (Arrays.stream(aliases)
311+
.anyMatch(
312+
a ->
313+
UnicodeProperty.equalNames(
314+
propertyValue, a))) {
315+
for (var value : coarseValue[1]) {
316+
prop.getSet(value, result);
314317
}
315318
return true;
316319
}

UnicodeJsps/src/test/java/org/unicode/jsptest/TestUnicodeSet.java

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -145,13 +145,19 @@ public void TestPretty() {
145145
}
146146

147147
@Test
148-
public void TestGeneralCategoryGroupings() {
148+
public void TestGeneralCategoryGroupingsWithIncrementalProperties() {
149149
IndexUnicodeProperties.useIncrementalProperties();
150150
UcdLoader.setOldestLoadedUcd(VersionInfo.UNICODE_10_0);
151151
checkSetsEqual("[\\p{U10:Lu}\\p{U10:Ll}\\p{U10:Lm}\\p{U10:Lt}\\p{U10:Lo}]", "\\p{U10:L}");
152152
UcdLoader.setOldestLoadedUcd(Settings.LAST_VERSION_INFO);
153153
}
154154

155+
@Test
156+
public void TestGeneralCategoryGroupings() {
157+
checkSetsEqual("[\\p{Lu}\\p{Ll}\\p{Lm}\\p{Lt}\\p{Lo}]", "\\p{L}");
158+
checkSetsEqual("[\\p{Mc}\\p{Me}\\p{Mn}]", "\\p{gc=Combining_Mark}");
159+
}
160+
155161
// public void TestAExemplars() {
156162
// checkProperties("[:exemplars_en:]", "[a]", "[\u0350]");
157163
// }

0 commit comments

Comments
 (0)