Skip to content

Commit 40615fd

Browse files
authored
Fix charts & data for emoji 13.1 (#25)
* aug 27 charts * test * first cut at fixing emoji * 2nd pass at emoji fixes * Fixes to get the counts right * Drop the 2 provisional characters replaced by sequences; add the handshakes with tones. Note special code to hack the names. * Updated emoji charts (small changes for proposal, note for handshake). Unrelated utility * small updates from review; and make sure to use latest images * Minor comment * Small change to restore kiss/couple (old code was filtering them out); also two unrelated changes to remove errors in UnicodeJsps. * drop old provisional mending heart * Cleanup proposal data, fix Emoji VERSION14
1 parent a87ae28 commit 40615fd

File tree

19 files changed

+860
-269
lines changed

19 files changed

+860
-269
lines changed

UnicodeJsps/build/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
/classes/

UnicodeJspsTest/src/org/unicode/jsptest/TestUnicodeSet.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@ public void TestPretty() {
136136
public void TestU60 () {
137137
logln("ICU Version: " + VersionInfo.ICU_VERSION.toString());
138138
logln("Unicode Data Version: " + UCharacter.getUnicodeVersion().toString());
139-
logln("Java Version: " + VersionInfo.javaVersion().toString());
139+
// logln("Java Version: " + VersionInfo.javaVersion().toString());
140140
logln("CLDR Data Version: " + LocaleData.getCLDRVersion().toString());
141141
logln("Time Zone Data Version: " + TimeZone.getTZDataVersion());
142142

unicodetools/data/emoji/13.1/emoji-sequences.txt

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1388,6 +1388,15 @@
13881388
1F9DD 1F3FE ; RGI_Emoji_Modifier_Sequence ; elf: medium-dark skin tone # E5.0 [1] (🧝🏾)
13891389
1F9DD 1F3FF ; RGI_Emoji_Modifier_Sequence ; elf: dark skin tone # E5.0 [1] (🧝🏿)
13901390

1391-
# Total elements: 580
1391+
1F491 1F3FB ; RGI_Emoji_Modifier_Sequence
1392+
1F491 1F3FC ; RGI_Emoji_Modifier_Sequence
1393+
1F491 1F3FD ; RGI_Emoji_Modifier_Sequence
1394+
1F491 1F3FE ; RGI_Emoji_Modifier_Sequence
1395+
1F491 1F3FF ; RGI_Emoji_Modifier_Sequence
1396+
1F48F 1F3FB ; RGI_Emoji_Modifier_Sequence
1397+
1F48F 1F3FC ; RGI_Emoji_Modifier_Sequence
1398+
1F48F 1F3FD ; RGI_Emoji_Modifier_Sequence
1399+
1F48F 1F3FE ; RGI_Emoji_Modifier_Sequence
1400+
1F48F 1F3FF ; RGI_Emoji_Modifier_Sequence
13921401

13931402
#EOF

unicodetools/data/emoji/13.1/emoji-zwj-sequences.txt

Lines changed: 212 additions & 6 deletions
Large diffs are not rendered by default.

unicodetools/org/unicode/jsptest/TestUnicodeSet.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ public void TestAEncodings() {
4646
public void TestU60 () {
4747
logln("ICU Version: " + VersionInfo.ICU_VERSION.toString());
4848
logln("Unicode Data Version: " + UCharacter.getUnicodeVersion().toString());
49-
logln("Java Version: " + VersionInfo.javaVersion().toString());
49+
// logln("Java Version: " + VersionInfo.javaVersion().toString());
5050
logln("CLDR Data Version: " + LocaleData.getCLDRVersion().toString());
5151
logln("Time Zone Data Version: " + TimeZone.getTZDataVersion());
5252

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
package org.unicode.propstest;
2+
3+
import com.ibm.icu.text.UnicodeSet;
4+
5+
public class PrintUnicodeSet {
6+
public static void main(String[] args) {
7+
System.out.println(new UnicodeSet(
8+
"[‾‽‸⁂↚↛↮↙↜↝↞↟↠↡↢↣↤↥↦↧↨↫↬↭↯↰↱↲↳↴↵↶↷↸↹↺↻↼↽↾↿⇀⇁⇂⇃⇄⇇⇈⇉⇊⇋⇌⇐⇍"
9+
+ "⇑⇒⇏⇓⇔⇎⇖⇗⇘⇙⇚⇛⇜⇝⇞⇟⇠⇡⇢⇣⇤⇥⇦⇧⇨⇩⇪⇵∀∂∃∅∉∋∎∏∑≮≯∓∕⁄∗∘∙∝∟∠∣∥∧∫∬∮∴∵∶∷∼∽∾"
10+
+ "≃≅≌≒≖≣≦≧≪≫≬≳≺≻⊁⊃⊆⊇⊕⊖⊗⊘⊙⊚⊛⊞⊟⊥⊮⊰⊱⋭⊶⊹⊿⋁⋂⋃⋅⋆⋈⋒⋘⋙⋮⋯⋰⋱■□▢▣▤▥▦▧▨▩▬▭▮▰△▴▵▷▸▹►▻▽▾"
11+
+ "▿◁◂◃◄◅◆◇◈◉◌◍◎◐◑◒◓◔◕◖◗◘◙◜◝◞◟◠◡◢◣◤◥◦◳◷◻◽◿⨧⨯⨼⩣⩽⪍⪚⪺₢₣₤₰₳₶₷₨﷼]"
12+
).toPattern(false));
13+
}
14+
}

unicodetools/org/unicode/text/utility/Settings.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ public class Settings {
2929
public static final String UNICODEJSPS_DIRECTORY = SVN_WORKSPACE_DIRECTORY + "UnicodeJsps/";
3030
public static final String UNICODE_DRAFT_DIRECTORY =
3131
CldrUtility.getProperty("UNICODE_DRAFT_DIR",
32-
SVN_WORKSPACE_DIRECTORY + "unicode-draft") + '/';
32+
SVN_WORKSPACE_DIRECTORY + "emoji/docs") + '/';
3333
public static final String UNICODE_DRAFT_PUBLIC = UNICODE_DRAFT_DIRECTORY + "Public/";
3434

3535
public static final String DATA_DIR = Utility.fixFileName(CldrUtility.getProperty("UCD_DIR", UNICODETOOLS_DIRECTORY + "data/")) + "/";
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
package org.unicode.tools;
2+
3+
import java.util.Objects;
4+
5+
import com.ibm.icu.text.UnicodeSet;
6+
7+
public class CompareUnicodeSets {
8+
public static void main(String[] args) {
9+
UnicodeSet a = new UnicodeSet("[ا أ آ ب پ ت ٹ ث ج چ ح خ د ڈ ذ ر ڑ ز ژ س ش ص ض ط ظ ع غ ف ق ک گ ل م ن ں و ؤ ہ ۂ ھ ء ی ئ ے ة ه]");
10+
UnicodeSet b = new UnicodeSet("[ا ب پ ت ٹ ث ج چ ح خ د ڈ ذ ر ڑ ز ژ س ش ص ض ط ظ ع غ ف ق ک گ ل م ن و ہ ھ ء ی ے]");
11+
System.out.println("old:\t" + a.toPattern(false));
12+
System.out.println("new:\t" + b.toPattern(false));
13+
System.out.println("old-only:\t" + new UnicodeSet(a).removeAll(b).toPattern(false));
14+
System.out.println("new-only:\t" + new UnicodeSet(b).removeAll(a).toPattern(false));
15+
Objects.equals(3, 3);
16+
}
17+
}

unicodetools/org/unicode/tools/emoji/CandidateData.java

Lines changed: 31 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,11 @@
5353
import com.ibm.icu.util.ULocale;
5454
import com.ibm.icu.util.VersionInfo;
5555

56+
/**
57+
* Provides data for candidates, reading the file candidateData.txt.
58+
* Note: At the end of a release, before the Draft Candidates are retired, run CandidateData.java to get the proposals for those
59+
* candidates, and add to the end of proposalData.txt
60+
*/
5661
public class CandidateData implements Transform<String, String>, EmojiDataSource {
5762
private static final String TEST_STRING = "👩‍🤝‍👩";
5863
private static final boolean SHOW_COMBOS = false;
@@ -128,6 +133,7 @@ public Set<Comparable> make() {
128133
private final UnicodeMap<Status> statuses = new UnicodeMap<>();
129134
private final UnicodeSet singleCharacters = new UnicodeSet();
130135
private final UnicodeSet allCharacters = new UnicodeSet();
136+
private final UnicodeSet fullDraftForProposals;
131137
private final UnicodeSet allNonProvisional = new UnicodeSet();
132138
private final UnicodeSet textPresentation = new UnicodeSet();
133139
private UnicodeSet provisional = new UnicodeSet();
@@ -305,6 +311,7 @@ private CandidateData(String sourceFile) {
305311
// allCharacters.removeAll(singleCharacters);
306312

307313
allCharacters.removeAll(duplicates).freeze();
314+
fullDraftForProposals = new UnicodeSet(statuses.getSet(Status.Draft_Candidate)).freeze();
308315
statuses.removeAll(duplicates).freeze();
309316
comments.freeze();
310317
categories.freeze();
@@ -424,7 +431,9 @@ private void addHackName(String name, String singleton, String prefix, String su
424431
compoundName += ", " + jName;
425432
}
426433
names.put(sequence, compoundName);
427-
System.out.println(Utility.hex(sequence) + " => " + compoundName);
434+
if (DEBUG) {
435+
System.out.println(Utility.hex(sequence) + " => " + compoundName);
436+
}
428437
}
429438
}
430439
}
@@ -797,7 +806,20 @@ public String getCategory(int source) {
797806
}
798807
public String getCategory(String source) {
799808
String result = EmojiOrder.STD_ORDER.charactersToOrdering.get(source);
800-
return result != null ? result : categories.get(source);
809+
if (result != null) {
810+
return result;
811+
}
812+
// final String stripped = EmojiData.removeEmojiVariants(EmojiData.MODIFIERS.stripFrom(source, true));
813+
// result = EmojiOrder.STD_ORDER.charactersToOrdering.get(stripped);
814+
// if (result != null) {
815+
// return result;
816+
// }
817+
result = categories.get(source);
818+
if (result != null) {
819+
return result;
820+
}
821+
System.out.println(Utility.hex(source) + "; " + source);
822+
return null;
801823
}
802824

803825
public List<Integer> getOrder() {
@@ -825,7 +847,7 @@ public static void main(String[] args) {
825847
DEBUG = true;
826848
CandidateData candidateData = CandidateData.getInstance();
827849
if (args.length == 0) {
828-
throw new IllegalArgumentException();
850+
args = new String[] {"proposals"};
829851
}
830852
int count = 0;
831853
for (String arg : args) {
@@ -896,10 +918,10 @@ private static void generateProposalData(CandidateData instance) {
896918
//1F931; L2/16-280,L2/16-282r; BREAST-FEEDING
897919
Set<String> done = new HashSet<>();
898920
UnicodeSet missing = new UnicodeSet();
899-
for (String item : instance.allCharacters) {
921+
for (String item : instance.fullDraftForProposals) {
900922
if (instance.statuses.get(item) == Status.Provisional_Candidate
901-
|| EmojiData.MODIFIERS.containsSome(item)
902-
|| Emoji.GENDER_MARKERS.containsSome(item)
923+
// || EmojiData.MODIFIERS.containsSome(item)
924+
// || Emoji.GENDER_MARKERS.containsSome(item)
903925
) {
904926
continue;
905927
}
@@ -909,9 +931,9 @@ private static void generateProposalData(CandidateData instance) {
909931
}
910932
done.add(skeleton);
911933
Set<String> proposals = instance.getProposal(item);
912-
if (proposals == null) {
913-
missing.add(item);
914-
}
934+
// if (proposals == null) {
935+
// missing.add(item);
936+
// }
915937
System.out.println(Utility.hex(skeleton)
916938
+ "; " + CollectionUtilities.join(proposals, ", ")
917939
+ "; " + instance.getName(item));

unicodetools/org/unicode/tools/emoji/Emoji.java

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ public class Emoji {
6262
/**
6363
* Constants for versions
6464
*/
65+
public static final VersionInfo VERSION14 = VersionInfo.getInstance(14,0);
6566
public static final VersionInfo VERSION13_1 = VersionInfo.getInstance(13,1);
6667
public static final VersionInfo VERSION13 = VersionInfo.getInstance(13);
6768
public static final VersionInfo VERSION12_1 = VersionInfo.getInstance(12,1);
@@ -91,9 +92,9 @@ public class Emoji {
9192
* Change each following once we release. That is, VERSION_LAST_RELEASED* becomes VERSION_BETA*, and both the latter increment.
9293
* Also add to EMOJI_TO_UNICODE_VERSION
9394
*/
94-
public static final VersionInfo VERSION_LAST_RELEASED2 = VERSION12_1;
95-
public static final VersionInfo VERSION_LAST_RELEASED = VERSION13;
96-
public static final VersionInfo VERSION_BETA = VERSION13_1;
95+
public static final VersionInfo VERSION_LAST_RELEASED2 = VERSION13;
96+
public static final VersionInfo VERSION_LAST_RELEASED = VERSION13_1;
97+
public static final VersionInfo VERSION_BETA = VERSION14;
9798

9899
public static final VersionInfo VERSION_TO_TEST = VERSION_BETA;
99100
public static final VersionInfo VERSION_TO_TEST_PREVIOUS = VERSION_LAST_RELEASED;
@@ -171,7 +172,7 @@ public class Emoji {
171172

172173
public static final VersionInfo VERSION_TO_GENERATE_UNICODE = IS_BETA ? VERSION_BETA_UNICODE : VERSION_LAST_RELEASED_UNICODE;
173174

174-
public static final String TR51_SVN_DIR = Settings.UNICODE_DRAFT_DIRECTORY + "reports/tr51/";
175+
//public static final String TR51_SVN_DIR = Settings.UNICODE_DRAFT_DIRECTORY + "reports/tr51/";
175176
//public static final String TR51_PREFIX = IS_BETA ? "internal-beta/" : "internal/";
176177

177178
public static final String EMOJI_DIR = Settings.UNICODE_DRAFT_DIRECTORY + "emoji/" + (Emoji.ABBR ? "🏴" : "");
@@ -185,7 +186,7 @@ public class Emoji {
185186
public static final String DATA_DIR_PRODUCTION = DATA_DIR_PRODUCTION_BASE + VERSION_STRING + "/";
186187

187188
public static final String IMAGES_SOURCE_DIR_SVG = Settings.UNICODETOOLS_DIRECTORY + "data/images/";
188-
static final String IMAGES_OUTPUT_DIR = TR51_SVN_DIR + "images/";
189+
public static final String IMAGES_OUTPUT_DIR = Settings.UNICODETOOLS_DIRECTORY + "../../images/emoji/";
189190

190191
public enum ModifierStatus {
191192
none, modifier, modifier_base;

0 commit comments

Comments
 (0)