Skip to content

Commit 9175cdf

Browse files
committed
Merge remote-tracking branch 'la-vache/main' into bamum-name-aliases
2 parents 15cdaee + 2160216 commit 9175cdf

File tree

175 files changed

+50038
-2262
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

175 files changed

+50038
-2262
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ perf-*.xml
4343
test-*.xml
4444

4545
# Directories
46+
.idea/
4647
.settings/
4748
.vs/
4849
.vscode/

UnicodeJsps/src/main/java/org/unicode/jsp/BidiCharMap.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ class BidiCharMap {
4848

4949
for (UnicodeSetIterator it =
5050
new UnicodeSetIterator(
51-
new UnicodeSet("[[:ascii:]-[[:cc:]-[:whitespace:]]]"));
51+
new UnicodeSet("[[:Block=ASCII:]-[[:cc:]-[:whitespace:]]]"));
5252
it.next(); ) {
5353
asciiHackMap.put(
5454
it.codepoint,

UnicodeJsps/src/main/java/org/unicode/jsp/Common.java

Lines changed: 2 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,7 @@
77
import com.ibm.icu.text.Normalizer2;
88
import com.ibm.icu.text.StringTransform;
99
import com.ibm.icu.text.Transform;
10-
import com.ibm.icu.text.UTF16;
1110
import com.ibm.icu.text.UnicodeSet;
12-
import com.ibm.icu.util.ULocale;
1311
import java.util.Arrays;
1412
import java.util.List;
1513
import org.unicode.jsp.XPropertyFactory.HanType.HanTypeValues;
@@ -47,18 +45,7 @@ public String transform(String source) {
4745
};
4846

4947
static List<String> XPROPERTY_NAMES =
50-
Arrays.asList(
51-
new String[] {
52-
"toNFC",
53-
"toNFD",
54-
"toNFKC",
55-
"toNFKD",
56-
"toCasefold",
57-
"toLowercase",
58-
"toUppercase",
59-
"toTitlecase",
60-
"subhead"
61-
});
48+
Arrays.asList(new String[] {"toNFC", "toNFD", "toNFKC", "toNFKD", "subhead"});
6249
static final int XSTRING_START = UProperty.STRING_LIMIT;
6350

6451
public static String getXStringPropertyValue(int propertyEnum, int codepoint, int nameChoice) {
@@ -72,14 +59,6 @@ public static String getXStringPropertyValue(int propertyEnum, int codepoint, in
7259
return Common.MyNormalize(codepoint, Normalizer.NFKC);
7360
case Common.TO_NFKD:
7461
return Common.MyNormalize(codepoint, Normalizer.NFKD);
75-
case Common.TO_CASEFOLD:
76-
return UCharacter.foldCase(UTF16.valueOf(codepoint), true);
77-
case Common.TO_LOWERCASE:
78-
return UCharacter.toLowerCase(ULocale.ROOT, UTF16.valueOf(codepoint));
79-
case Common.TO_UPPERCASE:
80-
return UCharacter.toUpperCase(ULocale.ROOT, UTF16.valueOf(codepoint));
81-
case Common.TO_TITLECASE:
82-
return UCharacter.toTitleCase(ULocale.ROOT, UTF16.valueOf(codepoint), null);
8362
case Common.SUBHEAD:
8463
return UnicodeUtilities.getSubheader().getSubheader(codepoint);
8564
}
@@ -120,11 +99,7 @@ static String MyNormalize(String string, Mode mode) {
12099
static final int TO_NFD = UProperty.STRING_LIMIT + 1;
121100
static final int TO_NFKC = UProperty.STRING_LIMIT + 2;
122101
static final int TO_NFKD = UProperty.STRING_LIMIT + 3;
123-
static final int TO_CASEFOLD = UProperty.STRING_LIMIT + 4;
124-
static final int TO_LOWERCASE = UProperty.STRING_LIMIT + 5;
125-
static final int TO_UPPERCASE = UProperty.STRING_LIMIT + 6;
126-
static final int TO_TITLECASE = UProperty.STRING_LIMIT + 7;
127-
public static final int SUBHEAD = TO_TITLECASE + 1;
102+
public static final int SUBHEAD = TO_NFKD + 1;
128103
static final int XSTRING_LIMIT = SUBHEAD + 1;
129104
// static UnicodeSet isCaseFolded = new UnicodeSet();
130105
// static UnicodeSet isLowercase = new UnicodeSet();

UnicodeJsps/src/main/java/org/unicode/jsp/UcdLoader.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ public static synchronized VersionInfo getOldestLoadedUcd() {
2121
return oldestLoadedUcd;
2222
}
2323

24-
private static synchronized void setOldestLoadedUcd(VersionInfo v) {
24+
public static synchronized void setOldestLoadedUcd(VersionInfo v) {
2525
if (v.compareTo(oldestLoadedUcd) < 0) {
2626
oldestLoadedUcd = v;
2727
}

UnicodeJsps/src/main/java/org/unicode/jsp/UnicodeSetUtilities.java

Lines changed: 7 additions & 234 deletions
Original file line numberDiff line numberDiff line change
@@ -7,15 +7,11 @@
77
import com.ibm.icu.text.UTF16.StringComparator;
88
import com.ibm.icu.text.UnicodeSet;
99
import com.ibm.icu.util.ULocale;
10-
import com.ibm.icu.util.VersionInfo;
1110
import java.text.ParsePosition;
1211
import java.util.Comparator;
13-
import java.util.List;
1412
import java.util.regex.Pattern;
1513
import org.unicode.cldr.util.MultiComparator;
16-
import org.unicode.props.UnicodeProperty;
17-
import org.unicode.props.UnicodeProperty.PatternMatcher;
18-
import org.unicode.props.UnicodePropertySymbolTable;
14+
import org.unicode.text.UCD.VersionedSymbolTable;
1915

2016
public class UnicodeSetUtilities {
2117

@@ -110,7 +106,12 @@ public static UnicodeSet parseUnicodeSet(String input) {
110106
input = input.trim() + "]]]]]";
111107
String parseInput = "[" + input + "]]]]]";
112108
ParsePosition parsePosition = new ParsePosition(0);
113-
UnicodeSet result = new UnicodeSet(parseInput, parsePosition, fullSymbolTable);
109+
UnicodeSet result =
110+
new UnicodeSet(
111+
parseInput,
112+
parsePosition,
113+
VersionedSymbolTable.forReview(UcdLoader::getOldestLoadedUcd)
114+
.setUnversionedExtensions(XPropertyFactory.make()));
114115
int parseEnd = parsePosition.getIndex();
115116
if (parseEnd != parseInput.length()
116117
&& !UnicodeSetUtilities.OK_AT_END.containsAll(parseInput.substring(parseEnd))) {
@@ -125,232 +126,4 @@ public static UnicodeSet parseUnicodeSet(String input) {
125126
}
126127
return result;
127128
}
128-
129-
static UnicodeSet.XSymbolTable fullSymbolTable = new MySymbolTable();
130-
131-
private static class MySymbolTable extends UnicodeSet.XSymbolTable {
132-
UnicodeRegex unicodeRegex;
133-
XPropertyFactory factory;
134-
135-
public MySymbolTable() {
136-
unicodeRegex = new UnicodeRegex().setSymbolTable(this);
137-
}
138-
139-
// public boolean applyPropertyAlias0(String propertyName,
140-
// String propertyValue, UnicodeSet result) {
141-
// if (!propertyName.contains("*")) {
142-
// return applyPropertyAlias(propertyName, propertyValue, result);
143-
// }
144-
// String[] propertyNames = propertyName.split("[*]");
145-
// for (int i = propertyNames.length - 1; i >= 0; ++i) {
146-
// String pname = propertyNames[i];
147-
//
148-
// }
149-
// return null;
150-
// }
151-
152-
@Override
153-
public boolean applyPropertyAlias(
154-
String propertyName, String propertyValue, UnicodeSet result) {
155-
boolean status = false;
156-
boolean invert = false;
157-
int posNotEqual = propertyName.indexOf('\u2260');
158-
if (posNotEqual >= 0) {
159-
propertyValue =
160-
propertyValue.length() == 0
161-
? propertyName.substring(posNotEqual + 1)
162-
: propertyName.substring(posNotEqual + 1) + "=" + propertyValue;
163-
propertyName = propertyName.substring(0, posNotEqual);
164-
invert = true;
165-
}
166-
if (propertyName.endsWith("!")) {
167-
propertyName = propertyName.substring(0, propertyName.length() - 1);
168-
invert = !invert;
169-
}
170-
int posColon = propertyName.indexOf(':');
171-
String versionPrefix = "";
172-
String versionlessPropertyName = propertyName;
173-
if (posColon >= 0) {
174-
versionPrefix = propertyName.substring(0, posColon + 1);
175-
versionlessPropertyName = propertyName.substring(posColon + 1);
176-
}
177-
178-
if (factory == null) {
179-
factory = XPropertyFactory.make();
180-
}
181-
182-
var gcProp = factory.getProperty(versionPrefix + "gc");
183-
var scProp = factory.getProperty(versionPrefix + "sc");
184-
185-
UnicodeProperty prop = factory.getProperty(propertyName);
186-
if (propertyValue.length() != 0) {
187-
if (prop == null) {
188-
propertyValue = propertyValue.trim();
189-
} else if (prop.isTrimmable()) {
190-
propertyValue = propertyValue.trim();
191-
} else {
192-
int debug = 0;
193-
}
194-
status = applyPropertyAlias0(prop, propertyValue, result, invert);
195-
} else {
196-
try {
197-
status = applyPropertyAlias0(gcProp, versionlessPropertyName, result, invert);
198-
} catch (Exception e) {
199-
}
200-
;
201-
if (!status) {
202-
try {
203-
status =
204-
applyPropertyAlias0(
205-
scProp, versionlessPropertyName, result, invert);
206-
} catch (Exception e) {
207-
}
208-
if (!status) {
209-
if (prop.isType(UnicodeProperty.BINARY_OR_ENUMERATED_OR_CATALOG_MASK)) {
210-
try {
211-
status = applyPropertyAlias0(prop, "No", result, !invert);
212-
} catch (Exception e) {
213-
}
214-
}
215-
if (!status) {
216-
status = applyPropertyAlias0(prop, "", result, invert);
217-
}
218-
}
219-
}
220-
}
221-
return status;
222-
}
223-
224-
private static String[][] COARSE_GENERAL_CATEGORIES = {
225-
{"Other", "C", "Cc", "Cf", "Cn", "Co", "Cs"},
226-
{"Letter", "L", "Ll", "Lm", "Lo", "Lt", "Lu"},
227-
{"Cased_Letter", "LC", "Ll", "Lt", "Lu"},
228-
{"Mark", "M", "Mc", "Me", "Mn"},
229-
{"Number", "N", "Nd", "Nl", "No"},
230-
{"Punctuation", "P", "Pc", "Pd", "Pe", "Pf", "Pi", "Po", "Ps"},
231-
{"Symbol", "S", "Sc", "Sk", "Sm", "So"},
232-
{"Separator", "Z", "Zl", "Zp", "Zs"},
233-
};
234-
235-
// TODO(eggrobin): I think this function only ever returns true; might as well make it void.
236-
private boolean applyPropertyAlias0(
237-
UnicodeProperty prop, String propertyValue, UnicodeSet result, boolean invert) {
238-
result.clear();
239-
String propertyName = prop.getName();
240-
String trimmedPropertyValue = propertyValue.trim();
241-
PatternMatcher patternMatcher = null;
242-
if (trimmedPropertyValue.length() > 1
243-
&& trimmedPropertyValue.startsWith("/")
244-
&& trimmedPropertyValue.endsWith("/")) {
245-
String fixedRegex =
246-
unicodeRegex.transform(
247-
trimmedPropertyValue.substring(
248-
1, trimmedPropertyValue.length() - 1));
249-
patternMatcher = new UnicodeProperty.RegexMatcher().set(fixedRegex);
250-
}
251-
UnicodeProperty otherProperty = null;
252-
boolean testCp = false;
253-
if (trimmedPropertyValue.length() > 1
254-
&& trimmedPropertyValue.startsWith("@")
255-
&& trimmedPropertyValue.endsWith("@")) {
256-
String otherPropName =
257-
trimmedPropertyValue.substring(1, trimmedPropertyValue.length() - 1).trim();
258-
if ("cp".equalsIgnoreCase(otherPropName)) {
259-
testCp = true;
260-
} else {
261-
otherProperty = factory.getProperty(otherPropName);
262-
}
263-
}
264-
boolean isAge = UnicodeProperty.equalNames("age", propertyName);
265-
if (prop != null) {
266-
UnicodeSet set;
267-
if (testCp) {
268-
set = new UnicodeSet();
269-
for (int i = 0; i <= 0x10FFFF; ++i) {
270-
if (invert != UnicodeProperty.equals(i, prop.getValue(i))) {
271-
set.add(i);
272-
}
273-
}
274-
} else if (otherProperty != null) {
275-
set = new UnicodeSet();
276-
for (int i = 0; i <= 0x10FFFF; ++i) {
277-
String v1 = prop.getValue(i);
278-
String v2 = otherProperty.getValue(i);
279-
if (invert != UnicodeProperty.equals(v1, v2)) {
280-
set.add(i);
281-
}
282-
}
283-
} else if (patternMatcher == null) {
284-
if (!isValid(prop, propertyValue)) {
285-
throw new IllegalArgumentException(
286-
"The value '"
287-
+ propertyValue
288-
+ "' is illegal. Values for "
289-
+ propertyName
290-
+ " must be in "
291-
+ prop.getAvailableValues()
292-
+ " or in "
293-
+ prop.getValueAliases());
294-
}
295-
if (isAge) {
296-
set =
297-
prop.getSet(
298-
new UnicodePropertySymbolTable.ComparisonMatcher<
299-
VersionInfo>(
300-
UnicodePropertySymbolTable.parseVersionInfoOrMax(
301-
propertyValue),
302-
UnicodePropertySymbolTable.Relation.geq,
303-
Comparator.nullsFirst(Comparator.naturalOrder()),
304-
UnicodePropertySymbolTable::parseVersionInfoOrMax));
305-
} else {
306-
if (prop.getName().equals("General_Category")) {
307-
for (String[] coarseValue : COARSE_GENERAL_CATEGORIES) {
308-
final String longName = coarseValue[0];
309-
final String shortName = coarseValue[1];
310-
if (UnicodeProperty.equalNames(propertyValue, longName)
311-
|| UnicodeProperty.equalNames(propertyValue, shortName)) {
312-
for (int i = 2; i < coarseValue.length; ++i) {
313-
prop.getSet(coarseValue[i], result);
314-
}
315-
return true;
316-
}
317-
}
318-
}
319-
set = prop.getSet(propertyValue);
320-
}
321-
} else if (isAge) {
322-
set = new UnicodeSet();
323-
List<String> values = prop.getAvailableValues();
324-
for (String value : values) {
325-
if (patternMatcher.test(value)) {
326-
for (String other : values) {
327-
if (other.compareTo(value) <= 0) {
328-
set.addAll(prop.getSet(other));
329-
}
330-
}
331-
}
332-
}
333-
} else {
334-
set = prop.getSet(patternMatcher);
335-
}
336-
if (invert) {
337-
if (isAge) {
338-
set.complement();
339-
} else {
340-
set = prop.getUnicodeMap().keySet().removeAll(set);
341-
}
342-
}
343-
result.addAll(set);
344-
return true;
345-
}
346-
throw new IllegalArgumentException("Illegal property: " + propertyName);
347-
}
348-
349-
private boolean isValid(UnicodeProperty prop, String propertyValue) {
350-
// if (prop.getName().equals("General_Category")) {
351-
// if (propertyValue)
352-
// }
353-
return prop.isValidValue(propertyValue);
354-
}
355-
}
356129
}

0 commit comments

Comments
 (0)