Skip to content

Commit 3c1ec5e

Browse files
committed
Somehow the code in UCD.java becomes a little bit cleaner
1 parent 55a450a commit 3c1ec5e

File tree

3 files changed

+44
-37
lines changed

3 files changed

+44
-37
lines changed

unicodetools/src/main/java/org/unicode/props/IndexUnicodeProperties.java

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
import java.util.EnumMap;
2929
import java.util.EnumSet;
3030
import java.util.HashMap;
31+
import java.util.HashSet;
3132
import java.util.LinkedHashSet;
3233
import java.util.List;
3334
import java.util.Locale;
@@ -839,11 +840,21 @@ public List<String> _getNameAliases(List result) {
839840
}
840841

841842
@Override
842-
protected List<String> _getAvailableValues(List result) {
843+
protected List<String> _getAvailableValues(List<String> result) {
843844
if (stringToNamedEnum != null) {
844845
result.addAll(enumValueNames);
845846
return result;
846847
}
848+
if (isMultivalued()) {
849+
HashSet<String> valueSet = new HashSet<>();
850+
for (var value : _getUnicodeMap().getAvailableValues()) {
851+
for (var part : delimiterSplitter.split(value)) {
852+
valueSet.add(part);
853+
}
854+
}
855+
result.addAll(valueSet);
856+
return result;
857+
}
847858
return _getUnicodeMap().getAvailableValues(result);
848859
}
849860

unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import java.util.ArrayList;
2121
import java.util.Arrays;
2222
import java.util.Collection;
23+
import java.util.Collections;
2324
import java.util.Comparator;
2425
import java.util.HashMap;
2526
import java.util.HashSet;
@@ -157,7 +158,7 @@ public static synchronized void ResetCacheProperties() {
157158
private boolean isMultivalued = false;
158159

159160
private String delimiter = ",";
160-
private Splitter delimiterSplitter = Splitter.on(delimiter);
161+
protected Splitter delimiterSplitter = Splitter.on(delimiter);
161162

162163
public UnicodeProperty setMultivalued(boolean value) {
163164
isMultivalued = value;
@@ -263,6 +264,12 @@ public String getVersion() {
263264
return _getVersion();
264265
}
265266

267+
public Iterable<String> getValues(int codepoint) {
268+
return isMultivalued
269+
? delimiterSplitter.split(getValue(codepoint))
270+
: Collections.singleton(getValue(codepoint));
271+
}
272+
266273
public String getValue(int codepoint) {
267274
if (DEBUG && CHECK_VALUE == codepoint && CHECK_NAME.equals(getName())) {
268275
String value = _getValue(codepoint);
@@ -290,8 +297,10 @@ public List<String> getValueAliases(String valueAlias, List<String> result) {
290297
if (result == null) result = new ArrayList<>(1);
291298
result = _getValueAliases(valueAlias, result);
292299
if (!result.contains(valueAlias)) { // FIX && type < NUMERIC
293-
if (type == MISC) {
300+
if (type == MISC || type == NUMERIC) {
294301
// Unihan has multivalued properties but does not use aliases.
302+
// The concept of aliases does not really apply to numeric properties,
303+
// but we should apply UAX44-LM1. We don’t, though.
295304
result.add(valueAlias);
296305
} else {
297306
result = _getValueAliases(valueAlias, result); // for debugging

unicodetools/src/main/java/org/unicode/text/UCD/UCD.java

Lines changed: 21 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -522,41 +522,28 @@ static class HanException {
522522
}
523523

524524
private void populateHanExceptions(UnicodeProperty numeric) {
525-
for (String value : numeric.getAvailableValues()) {
526-
if (value == null || value.equals("NaN")) {
527-
continue;
528-
}
529-
String propertyValue = Utility.replace(value, ",", "");
530-
final int hack = propertyValue.indexOf(' ');
531-
if (hack >= 0) {
532-
Utility.fixDot();
533-
if (SHOW_LOADING) {
534-
System.out.println("BAD NUMBER: " + value);
535-
}
536-
propertyValue = propertyValue.substring(0, hack);
537-
}
538-
539-
for (String s : numeric.getSet(value)) {
540-
final int code = s.codePointAt(0);
541-
// Unicode 15.1:
542-
// This code had these two exceptions, but now U+4EAC actually has value
543-
// 10000000000000000
544-
// and we want to see that in DerivedNumericValues.txt,
545-
// so we stop making these exceptions.
546-
if (compositeVersion < 0xf0100 && (code == 0x5793 || code == 0x4EAC)) {
547-
continue; // two exceptions!!
548-
}
549-
550-
HanException except = (HanException) hanExceptions.get(code);
551-
if (except != null) {
552-
throw new IllegalArgumentException(
553-
"Duplicate Numeric Value for U+" + Utility.hex(code));
554-
}
555-
except = new HanException();
556-
hanExceptions.put(code, except);
557-
except.numericValue = Double.parseDouble(propertyValue);
558-
except.numericType = NUMERIC;
525+
for (final int code : numeric.getSet("NaN").complement().codePoints()) {
526+
// Unicode 15.1:
527+
// This code had these two exceptions, but now U+4EAC actually has value
528+
// 10000000000000000
529+
// and we want to see that in DerivedNumericValues.txt,
530+
// so we stop making these exceptions.
531+
// NOTE(egg): These two exceptions (we are in a function called exceptions, so these are
532+
// exceptions to the broader exception that is Han numeric values) were made irrelevant
533+
// sometime before Unicode 5.2. See L2/03-094 for background.
534+
if (compositeVersion < 0xf0100 && (code == 0x5793 || code == 0x4EAC)) {
535+
continue; // two exceptions!!
536+
}
537+
538+
HanException except = (HanException) hanExceptions.get(code);
539+
if (except != null && false) {
540+
throw new IllegalArgumentException(
541+
"Duplicate Numeric Value for U+" + Utility.hex(code));
559542
}
543+
except = new HanException();
544+
hanExceptions.put(code, except);
545+
except.numericValue = Double.parseDouble(numeric.getValues(code).iterator().next());
546+
except.numericType = NUMERIC;
560547
}
561548
}
562549

0 commit comments

Comments
 (0)