Skip to content

Commit 6edf025

Browse files
committed
spotless
1 parent f43561a commit 6edf025

File tree

4 files changed

+153
-142
lines changed

4 files changed

+153
-142
lines changed
Lines changed: 78 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -1,83 +1,95 @@
11
package org.unicode.tools;
22

3+
import com.google.common.base.Objects;
4+
import com.ibm.icu.impl.UnicodeMap;
5+
import com.ibm.icu.text.Normalizer2;
6+
import com.ibm.icu.text.UnicodeSet;
37
import java.io.IOException;
48
import java.nio.file.Files;
59
import java.nio.file.Path;
610
import java.nio.file.Paths;
711
import java.util.LinkedHashMap;
812
import java.util.Map;
913
import java.util.Map.Entry;
10-
1114
import org.unicode.cldr.util.Pair;
1215
import org.unicode.text.utility.Settings.UnicodeTools;
1316
import org.unicode.text.utility.Utility;
1417

15-
import com.google.common.base.Objects;
16-
import com.ibm.icu.impl.UnicodeMap;
17-
import com.ibm.icu.text.Normalizer2;
18-
import com.ibm.icu.text.UnicodeSet;
19-
2018
public class CheckMissingNFKC {
21-
public static void main(String[] args) throws IOException {
22-
Normalizer2 nfkc_cf = Normalizer2.getNFKCCasefoldInstance();
23-
24-
UnicodeMap<String> n4m = new UnicodeMap<>();
25-
System.out.println(UnicodeTools.UNICODETOOLS_RSRC_DIR);
26-
// "/unicodetools/src/main/resources/org/unicode/tools/nfkc-extended.txt"
27-
Path filePath = Paths.get(UnicodeTools.UNICODETOOLS_RSRC_DIR,"org/unicode/tools/nfkc-extended.txt");
28-
29-
// Unfortunately the internal tools in ICU aren't accessible, so parse it ourselves
30-
// https://unicode-org.github.io/icu/userguide/transforms/normalization/#data-file-syntax
31-
32-
Files.lines(filePath).forEach(line -> {
33-
if (line.isBlank() || line.startsWith("*")) {
34-
return;
35-
}
36-
int greaterPos = line.indexOf('>');
37-
if (greaterPos < 0) {
38-
greaterPos = line.indexOf('='); // for our purposed, = is the same as >
39-
if (greaterPos < 0) {
40-
int colonPos = line.indexOf(':');
41-
if (colonPos < 0) {
42-
throw new IllegalArgumentException("line: " + line);
43-
}
44-
}
45-
return;
46-
}
47-
String rawSource = line.substring(0,greaterPos);
48-
String target = Utility.fromHex(line.substring(greaterPos+1));
19+
public static void main(String[] args) throws IOException {
20+
Normalizer2 nfkc_cf = Normalizer2.getNFKCCasefoldInstance();
4921

50-
int rangePos = rawSource.indexOf("..");
51-
if (rangePos < 0) {
52-
String source = Utility.fromHex(rawSource);
53-
n4m.put(source, target);
54-
} else {
55-
int sourceStart = Utility.fromHex(rawSource.substring(0,rangePos)).codePointAt(0);
56-
int sourceEnd = Utility.fromHex(rawSource.substring(rangePos+2)).codePointAt(0);
57-
n4m.putAll(sourceStart, sourceEnd, target);
58-
}
59-
});
60-
61-
Map<String, Pair<String, String>> diff = new LinkedHashMap<>();
62-
UnicodeSet toCheck = new UnicodeSet("[[\\P{C}]-\\p{cf}]");
63-
System.out.println("Checking: " + toCheck.size() + " \t" + toCheck);
64-
for (int cp : toCheck.codePoints()) {
65-
String string = Character.toString(cp); // wish there were a code point interface
66-
String nfc_cfString = nfkc_cf.normalize(string);
67-
String n4mString = n4m.get(cp);
68-
if (n4mString == null) {
69-
n4mString = string;
70-
}
71-
if (Objects.equal(nfc_cfString, n4mString)) {
72-
continue;
73-
}
74-
diff.put(string, Pair.of(n4mString, nfc_cfString));
75-
}
76-
System.out.println("Differences:\t" + diff.size());
77-
System.out.println("Source" + "\t" + "N4M" + "\t" + "nfkc_cf");
22+
UnicodeMap<String> n4m = new UnicodeMap<>();
23+
System.out.println(UnicodeTools.UNICODETOOLS_RSRC_DIR);
24+
// "/unicodetools/src/main/resources/org/unicode/tools/nfkc-extended.txt"
25+
Path filePath =
26+
Paths.get(
27+
UnicodeTools.UNICODETOOLS_RSRC_DIR, "org/unicode/tools/nfkc-extended.txt");
7828

79-
for (Entry<String, Pair<String, String>> entry : diff.entrySet()) {
80-
System.out.println(Utility.hex(entry.getKey()) + "\t" + Utility.hex(entry.getValue().getFirst()) + "\t" + Utility.hex(entry.getValue().getSecond()));
81-
}
82-
}
29+
// Unfortunately the internal tools in ICU aren't accessible, so parse it ourselves
30+
// https://unicode-org.github.io/icu/userguide/transforms/normalization/#data-file-syntax
31+
32+
Files.lines(filePath)
33+
.forEach(
34+
line -> {
35+
if (line.isBlank() || line.startsWith("*")) {
36+
return;
37+
}
38+
int greaterPos = line.indexOf('>');
39+
if (greaterPos < 0) {
40+
greaterPos =
41+
line.indexOf('='); // for our purposed, = is the same as >
42+
if (greaterPos < 0) {
43+
int colonPos = line.indexOf(':');
44+
if (colonPos < 0) {
45+
throw new IllegalArgumentException("line: " + line);
46+
}
47+
}
48+
return;
49+
}
50+
String rawSource = line.substring(0, greaterPos);
51+
String target = Utility.fromHex(line.substring(greaterPos + 1));
52+
53+
int rangePos = rawSource.indexOf("..");
54+
if (rangePos < 0) {
55+
String source = Utility.fromHex(rawSource);
56+
n4m.put(source, target);
57+
} else {
58+
int sourceStart =
59+
Utility.fromHex(rawSource.substring(0, rangePos))
60+
.codePointAt(0);
61+
int sourceEnd =
62+
Utility.fromHex(rawSource.substring(rangePos + 2))
63+
.codePointAt(0);
64+
n4m.putAll(sourceStart, sourceEnd, target);
65+
}
66+
});
67+
68+
Map<String, Pair<String, String>> diff = new LinkedHashMap<>();
69+
UnicodeSet toCheck = new UnicodeSet("[[\\P{C}]-\\p{cf}]");
70+
System.out.println("Checking: " + toCheck.size() + " \t" + toCheck);
71+
for (int cp : toCheck.codePoints()) {
72+
String string = Character.toString(cp); // wish there were a code point interface
73+
String nfc_cfString = nfkc_cf.normalize(string);
74+
String n4mString = n4m.get(cp);
75+
if (n4mString == null) {
76+
n4mString = string;
77+
}
78+
if (Objects.equal(nfc_cfString, n4mString)) {
79+
continue;
80+
}
81+
diff.put(string, Pair.of(n4mString, nfc_cfString));
82+
}
83+
System.out.println("Differences:\t" + diff.size());
84+
System.out.println("Source" + "\t" + "N4M" + "\t" + "nfkc_cf");
85+
86+
for (Entry<String, Pair<String, String>> entry : diff.entrySet()) {
87+
System.out.println(
88+
Utility.hex(entry.getKey())
89+
+ "\t"
90+
+ Utility.hex(entry.getValue().getFirst())
91+
+ "\t"
92+
+ Utility.hex(entry.getValue().getSecond()));
93+
}
94+
}
8395
}

unicodetools/src/main/java/org/unicode/tools/FindBlankGlyphs.java

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -27,17 +27,18 @@ public static void main(String[] args) {
2727
int fontSize = 144;
2828
Font font = new Font(fontName, Font.PLAIN, fontSize);
2929

30-
String specialCases = "[\\N{HANGUL CHOSEONG FILLER}"
31-
+ "\\N{HANGUL JUNGSEONG FILLER}"
32-
+ "\\N{HANGUL FILLER}"
33-
+ "\\N{HALFWIDTH HANGUL FILLER}"
34-
+ "\\N{COMBINING GRAPHEME JOINER}"
35-
+ "\\N{KHMER VOWEL INHERENT AQ}"
36-
+ "\\N{KHMER VOWEL INHERENT AA}"
37-
+ "\\N{BRAILLE PATTERN BLANK}"
38-
+ "\\p{variation_selector}]"
39-
+ "]";
40-
UnicodeSet exclusions =
30+
String specialCases =
31+
"[\\N{HANGUL CHOSEONG FILLER}"
32+
+ "\\N{HANGUL JUNGSEONG FILLER}"
33+
+ "\\N{HANGUL FILLER}"
34+
+ "\\N{HALFWIDTH HANGUL FILLER}"
35+
+ "\\N{COMBINING GRAPHEME JOINER}"
36+
+ "\\N{KHMER VOWEL INHERENT AQ}"
37+
+ "\\N{KHMER VOWEL INHERENT AA}"
38+
+ "\\N{BRAILLE PATTERN BLANK}"
39+
+ "\\p{variation_selector}]"
40+
+ "]";
41+
UnicodeSet exclusions =
4142
new UnicodeSet(
4243
"["
4344
+ "\\p{C}"
@@ -79,7 +80,8 @@ public static void main(String[] args) {
7980
+ "\t"
8081
+ UCharacter.getExtendedName(cp)
8182
+ "\t"
82-
+ GlyphRenderer.getPropValueName(UProperty.GENERAL_CATEGORY, NameChoice.SHORT, cp)
83+
+ GlyphRenderer.getPropValueName(
84+
UProperty.GENERAL_CATEGORY, NameChoice.SHORT, cp)
8385
+ "\t"
8486
+ visualBounds);
8587

@@ -130,7 +132,8 @@ public static void main(String[] args) {
130132
+ "</td><td>"
131133
+ UCharacter.getExtendedName(cp)
132134
+ "</td><td>"
133-
+ GlyphRenderer.getPropValueName(UProperty.GENERAL_CATEGORY, NameChoice.SHORT, cp)
135+
+ GlyphRenderer.getPropValueName(
136+
UProperty.GENERAL_CATEGORY, NameChoice.SHORT, cp)
134137
+ "</td>"
135138
+ "<tr>");
136139
}

0 commit comments

Comments
 (0)