Skip to content

Commit cc298e9

Browse files
authored
Full implementation for kEH_Func and kEH_FVal (#1162)
1 parent 2652137 commit cc298e9

File tree

9 files changed

+75
-52
lines changed

9 files changed

+75
-52
lines changed

unicodetools/data/ucdxml/dev/ucd.nounihan.grouped.xml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -36334,9 +36334,9 @@
3633436334
<char cp="12FF2" na="CYPRO-MINOAN SIGN CM302" gc="Po" Alpha="N" IDS="N" XIDS="N" IDC="N" XIDC="N" WB="XX" SB="XX"/>
3633536335
<reserved first-cp="12FF3" last-cp="12FFF" age="unassigned" na="" gc="Cn" lb="XX" sc="Zzzz" scx="Zzzz" Alpha="N" Gr_Base="N" IDS="N" XIDS="N" IDC="N" XIDC="N" WB="XX" SB="XX"/>
3633636336
</group>
36337-
<group age="5.2" JSN="" gc="Lo" ccc="0" dt="none" dm="#" nt="None" nv="NaN" bc="L" bpt="n" bpb="#" Bidi_M="N" bmg="" suc="#" slc="#" stc="#" uc="#" lc="#" tc="#" scf="#" cf="#" jt="U" jg="No_Joining_Group" ea="N" lb="AL" sc="Egyp" scx="Egyp" Dash="N" WSpace="N" QMark="N" Radical="N" Ideo="N" UIdeo="N" IDSB="N" IDST="N" hst="NA" DI="N" ODI="N" Alpha="Y" OAlpha="N" Upper="N" OUpper="N" Lower="N" OLower="N" Math="N" OMath="N" Hex="N" AHex="N" NChar="N" VS="N" Bidi_C="N" Join_C="N" Gr_Base="Y" Gr_Ext="N" OGr_Ext="N" STerm="N" Ext="N" Term="N" Dia="N" Dep="N" IDS="Y" OIDS="N" XIDS="Y" IDC="Y" OIDC="N" XIDC="Y" SD="N" LOE="N" Pat_WS="N" Pat_Syn="N" GCB="XX" WB="LE" SB="LE" CE="N" Comp_Ex="N" NFC_QC="Y" NFD_QC="Y" NFKC_QC="Y" NFKD_QC="Y" CI="N" Cased="N" CWCF="N" CWCM="N" CWKCF="N" CWL="N" CWT="N" CWU="N" NFKC_CF="#" InSC="Other" InPC="NA" PCM="N" vo="U" RI="N" blk="Egyptian_Hieroglyphs" na1="" Emoji="N" EPres="N" EMod="N" EBase="N" EComp="N" ExtPict="N" NFKC_SCF="#" ID_Compat_Math_Start="N" ID_Compat_Math_Continue="N" IDSU="N" InCB="None" MCM="N" kEH_Core="C" kEH_Func="Phonemogram" kEH_NoMirror="N" kEH_NoRotate="N" kEH_AltSeq="#">
36337+
<group age="5.2" JSN="" gc="Lo" ccc="0" dt="none" dm="#" nt="None" nv="NaN" bc="L" bpt="n" bpb="#" Bidi_M="N" bmg="" suc="#" slc="#" stc="#" uc="#" lc="#" tc="#" scf="#" cf="#" jt="U" jg="No_Joining_Group" ea="N" lb="AL" sc="Egyp" scx="Egyp" Dash="N" WSpace="N" QMark="N" Radical="N" Ideo="N" UIdeo="N" IDSB="N" IDST="N" hst="NA" DI="N" ODI="N" Alpha="Y" OAlpha="N" Upper="N" OUpper="N" Lower="N" OLower="N" Math="N" OMath="N" Hex="N" AHex="N" NChar="N" VS="N" Bidi_C="N" Join_C="N" Gr_Base="Y" Gr_Ext="N" OGr_Ext="N" STerm="N" Ext="N" Term="N" Dia="N" Dep="N" IDS="Y" OIDS="N" XIDS="Y" IDC="Y" OIDC="N" XIDC="Y" SD="N" LOE="N" Pat_WS="N" Pat_Syn="N" GCB="XX" WB="LE" SB="LE" CE="N" Comp_Ex="N" NFC_QC="Y" NFD_QC="Y" NFKC_QC="Y" NFKD_QC="Y" CI="N" Cased="N" CWCF="N" CWCM="N" CWKCF="N" CWL="N" CWT="N" CWU="N" NFKC_CF="#" InSC="Other" InPC="NA" PCM="N" vo="U" RI="N" blk="Egyptian_Hieroglyphs" na1="" Emoji="N" EPres="N" EMod="N" EBase="N" EComp="N" ExtPict="N" NFKC_SCF="#" ID_Compat_Math_Start="N" ID_Compat_Math_Continue="N" IDSU="N" InCB="None" MCM="N" kEH_Core="C" kEH_Func="Phonemogram" kEH_NoMirror="N" kEH_NoRotate="N">
3633836338
<char cp="13000" na="EGYPTIAN HIEROGLYPH A001" kEH_Cat="A-01-001" kEH_Desc="Man, seated, right knee raised, right arm raised, left arm in front of body." kEH_Func="Classifier human being" kEH_UniK="A001" kEH_JSesh="A1" kEH_HG="A1" kEH_IFAO="1,1"/>
36339-
<char cp="13001" na="EGYPTIAN HIEROGLYPH A002" kEH_Cat="A-01-027" kEH_Desc="Man, seated, right knee raised, right arm raised with hand to mouth, left arm hanging beside the body." kEH_Func="Classifier eating/drinking/speech/ silence/thoughts/feelings" kEH_UniK="A002" kEH_JSesh="A2" kEH_HG="A2" kEH_IFAO="2,4"/>
36339+
<char cp="13001" na="EGYPTIAN HIEROGLYPH A002" kEH_Cat="A-01-027" kEH_Desc="Man, seated, right knee raised, right arm raised with hand to mouth, left arm hanging beside the body." kEH_Func="Classifier eating/drinking/speech/silence/thoughts/feelings" kEH_UniK="A002" kEH_JSesh="A2" kEH_HG="A2" kEH_IFAO="2,4"/>
3634036340
<char cp="13002" na="EGYPTIAN HIEROGLYPH A003" kEH_Cat="A-01-005" kEH_Desc="Man, seated on heel, right knee raised, right arm raised, left arm in front of body." kEH_Func="Classifier sitting" kEH_FVal="ḥmsꞽ" kEH_UniK="A003" kEH_JSesh="A3" kEH_HG="A3"/>
3634136341
<char cp="13003" na="EGYPTIAN HIEROGLYPH A004" kEH_Cat="A-01-040" kEH_Desc="Man, seated, right knee raised, back straight, both arms raised in front, hand palms outwards." kEH_Func="Classifier supplication/hiding/rejection" kEH_UniK="A004" kEH_JSesh="A4" kEH_HG="A4" kEH_IFAO="2,14"/>
3634236342
<char cp="13004" na="EGYPTIAN HIEROGLYPH A005" kEH_Cat="A-11-002" kEH_Func="" kEH_UniK="A005" kEH_JSesh="A5" kEH_HG="A5"/>
@@ -37408,7 +37408,7 @@
3740837408
<char cp="1342E" na="EGYPTIAN HIEROGLYPH AA032" kEH_Cat="T-06-015" kEH_Desc="A bow, of an archaic type, with the ends towards the back." kEH_Func="Logogram (Nubia)" kEH_FVal="sty" kEH_UniK="AA032" kEH_JSesh="Aa32" kEH_HG="AA32"/>
3740937409
<char cp="1342F" age="15.0" na="EGYPTIAN HIEROGLYPH V011D" lb="OP" kEH_Cat="V-03-021" kEH_Func="" kEH_UniK="V011D" kEH_IFAO="434,10"/>
3741037410
</group>
37411-
<group age="15.0" JSN="" gc="Cf" ccc="0" dt="none" dm="#" nt="None" nv="NaN" bc="L" bpt="n" bpb="#" Bidi_M="N" bmg="" suc="#" slc="#" stc="#" uc="#" lc="#" tc="#" scf="#" cf="#" jt="T" jg="No_Joining_Group" ea="N" lb="CM" sc="Egyp" scx="Egyp" Dash="N" WSpace="N" QMark="N" Radical="N" Ideo="N" UIdeo="N" IDSB="N" IDST="N" hst="NA" DI="N" ODI="N" Alpha="N" OAlpha="N" Upper="N" OUpper="N" Lower="N" OLower="N" Math="N" OMath="N" Hex="N" AHex="N" NChar="N" VS="N" Bidi_C="N" Join_C="N" Gr_Base="N" Gr_Ext="N" OGr_Ext="N" STerm="N" Ext="N" Term="N" Dia="N" Dep="N" IDS="N" OIDS="N" XIDS="N" IDC="Y" OIDC="N" XIDC="Y" SD="N" LOE="N" Pat_WS="N" Pat_Syn="N" GCB="CN" WB="FO" SB="FO" CE="N" Comp_Ex="N" NFC_QC="Y" NFD_QC="Y" NFKC_QC="Y" NFKD_QC="Y" CI="Y" Cased="N" CWCF="N" CWCM="N" CWKCF="N" CWL="N" CWT="N" CWU="N" NFKC_CF="#" InSC="Other" InPC="NA" PCM="N" vo="U" RI="N" blk="Egyptian_Hieroglyph_Format_Controls" na1="" Emoji="N" EPres="N" EMod="N" EBase="N" EComp="N" ExtPict="N" NFKC_SCF="#" ID_Compat_Math_Start="N" ID_Compat_Math_Continue="N" IDSU="N" InCB="None" MCM="N" kEH_Core="N" kEH_NoMirror="N" kEH_NoRotate="N" kEH_AltSeq="#">
37411+
<group age="15.0" JSN="" gc="Cf" ccc="0" dt="none" dm="#" nt="None" nv="NaN" bc="L" bpt="n" bpb="#" Bidi_M="N" bmg="" suc="#" slc="#" stc="#" uc="#" lc="#" tc="#" scf="#" cf="#" jt="T" jg="No_Joining_Group" ea="N" lb="CM" sc="Egyp" scx="Egyp" Dash="N" WSpace="N" QMark="N" Radical="N" Ideo="N" UIdeo="N" IDSB="N" IDST="N" hst="NA" DI="N" ODI="N" Alpha="N" OAlpha="N" Upper="N" OUpper="N" Lower="N" OLower="N" Math="N" OMath="N" Hex="N" AHex="N" NChar="N" VS="N" Bidi_C="N" Join_C="N" Gr_Base="N" Gr_Ext="N" OGr_Ext="N" STerm="N" Ext="N" Term="N" Dia="N" Dep="N" IDS="N" OIDS="N" XIDS="N" IDC="Y" OIDC="N" XIDC="Y" SD="N" LOE="N" Pat_WS="N" Pat_Syn="N" GCB="CN" WB="FO" SB="FO" CE="N" Comp_Ex="N" NFC_QC="Y" NFD_QC="Y" NFKC_QC="Y" NFKD_QC="Y" CI="Y" Cased="N" CWCF="N" CWCM="N" CWKCF="N" CWL="N" CWT="N" CWU="N" NFKC_CF="#" InSC="Other" InPC="NA" PCM="N" vo="U" RI="N" blk="Egyptian_Hieroglyph_Format_Controls" na1="" Emoji="N" EPres="N" EMod="N" EBase="N" EComp="N" ExtPict="N" NFKC_SCF="#" ID_Compat_Math_Start="N" ID_Compat_Math_Continue="N" IDSU="N" InCB="None" MCM="N" kEH_Core="N" kEH_NoMirror="N" kEH_NoRotate="N">
3741237412
<char cp="13430" age="12.0" na="EGYPTIAN HIEROGLYPH VERTICAL JOINER" lb="GL" IDC="N" XIDC="N"/>
3741337413
<char cp="13431" age="12.0" na="EGYPTIAN HIEROGLYPH HORIZONTAL JOINER" lb="GL" IDC="N" XIDC="N"/>
3741437414
<char cp="13432" age="12.0" na="EGYPTIAN HIEROGLYPH INSERT AT TOP START" lb="GL" IDC="N" XIDC="N"/>
@@ -37449,7 +37449,7 @@
3744937449
<char cp="13455" na="EGYPTIAN HIEROGLYPH MODIFIER DAMAGED" gc="Mn" bc="NSM" Gr_Ext="Y" Dia="Y" GCB="EX" WB="Extend" SB="EX" InCB="Extend"/>
3745037450
<reserved first-cp="13456" last-cp="1345F" age="unassigned" na="" gc="Cn" jt="U" lb="XX" sc="Zzzz" scx="Zzzz" IDC="N" XIDC="N" GCB="XX" WB="XX" SB="XX" CI="N"/>
3745137451
</group>
37452-
<group age="16.0" na="EGYPTIAN HIEROGLYPH-#" JSN="" gc="Lo" ccc="0" dt="none" dm="#" nt="None" nv="NaN" bc="L" bpt="n" bpb="#" Bidi_M="N" bmg="" suc="#" slc="#" stc="#" uc="#" lc="#" tc="#" scf="#" cf="#" jt="U" jg="No_Joining_Group" ea="N" lb="AL" sc="Egyp" scx="Egyp" Dash="N" WSpace="N" QMark="N" Radical="N" Ideo="N" UIdeo="N" IDSB="N" IDST="N" hst="NA" DI="N" ODI="N" Alpha="Y" OAlpha="N" Upper="N" OUpper="N" Lower="N" OLower="N" Math="N" OMath="N" Hex="N" AHex="N" NChar="N" VS="N" Bidi_C="N" Join_C="N" Gr_Base="Y" Gr_Ext="N" OGr_Ext="N" STerm="N" Ext="N" Term="N" Dia="N" Dep="N" IDS="Y" OIDS="N" XIDS="Y" IDC="Y" OIDC="N" XIDC="Y" SD="N" LOE="N" Pat_WS="N" Pat_Syn="N" GCB="XX" WB="LE" SB="LE" CE="N" Comp_Ex="N" NFC_QC="Y" NFD_QC="Y" NFKC_QC="Y" NFKD_QC="Y" CI="N" Cased="N" CWCF="N" CWCM="N" CWKCF="N" CWL="N" CWT="N" CWU="N" NFKC_CF="#" InSC="Other" InPC="NA" PCM="N" vo="U" RI="N" blk="Egyptian_Hieroglyphs_Ext_A" na1="" Emoji="N" EPres="N" EMod="N" EBase="N" EComp="N" ExtPict="N" NFKC_SCF="#" ID_Compat_Math_Start="N" ID_Compat_Math_Continue="N" IDSU="N" InCB="None" MCM="N" kEH_Core="C" kEH_NoMirror="N" kEH_NoRotate="N" kEH_AltSeq="#">
37452+
<group age="16.0" na="EGYPTIAN HIEROGLYPH-#" JSN="" gc="Lo" ccc="0" dt="none" dm="#" nt="None" nv="NaN" bc="L" bpt="n" bpb="#" Bidi_M="N" bmg="" suc="#" slc="#" stc="#" uc="#" lc="#" tc="#" scf="#" cf="#" jt="U" jg="No_Joining_Group" ea="N" lb="AL" sc="Egyp" scx="Egyp" Dash="N" WSpace="N" QMark="N" Radical="N" Ideo="N" UIdeo="N" IDSB="N" IDST="N" hst="NA" DI="N" ODI="N" Alpha="Y" OAlpha="N" Upper="N" OUpper="N" Lower="N" OLower="N" Math="N" OMath="N" Hex="N" AHex="N" NChar="N" VS="N" Bidi_C="N" Join_C="N" Gr_Base="Y" Gr_Ext="N" OGr_Ext="N" STerm="N" Ext="N" Term="N" Dia="N" Dep="N" IDS="Y" OIDS="N" XIDS="Y" IDC="Y" OIDC="N" XIDC="Y" SD="N" LOE="N" Pat_WS="N" Pat_Syn="N" GCB="XX" WB="LE" SB="LE" CE="N" Comp_Ex="N" NFC_QC="Y" NFD_QC="Y" NFKC_QC="Y" NFKD_QC="Y" CI="N" Cased="N" CWCF="N" CWCM="N" CWKCF="N" CWL="N" CWT="N" CWU="N" NFKC_CF="#" InSC="Other" InPC="NA" PCM="N" vo="U" RI="N" blk="Egyptian_Hieroglyphs_Ext_A" na1="" Emoji="N" EPres="N" EMod="N" EBase="N" EComp="N" ExtPict="N" NFKC_SCF="#" ID_Compat_Math_Start="N" ID_Compat_Math_Continue="N" IDSU="N" InCB="None" MCM="N" kEH_Core="C" kEH_NoMirror="N" kEH_NoRotate="N">
3745337453
<char cp="13460" kEH_Cat="A-01-002" kEH_Core="N" kEH_Desc="Man, seated, right knee raised, right arm raised, left arm behind body." kEH_Func="Classifier human being" kEH_UniK="A001F"/>
3745437454
<char cp="13461" kEH_Cat="A-01-003" kEH_Desc="Man, seated, both knees raised, foot flat on the ground, right arm raised, left arm in front of the body." kEH_Func="Classifier human being" kEH_FVal="rmn.w" kEH_UniK="A001D" kEH_IFAO="1,3"/>
3745537455
<char cp="13462" kEH_Cat="A-01-004" kEH_Desc="Man, seated, right knee raised, right foot in front of the left knee, left foot horizontal, right arm raised in front, left arm in front of the body." kEH_Func="Logogram (first person singular)" kEH_FVal="ꞽ" kEH_UniK="A001H"/>
@@ -37465,7 +37465,7 @@
3746537465
<char cp="1346C" kEH_Cat="A-01-021" kEH_Desc="Man, seated, both knees down, right arm raised, hand held vertically, hand palm inwards, left arm held in front of the body, lower arm and hand horizontal, hand palm upwards." kEH_Func="Logogram (first person singular)" kEH_FVal="ꞽ" kEH_UniK="HJ A072A" kEH_JSesh="A72A" kEH_HG="A72A"/>
3746637466
<char cp="1346D" kEH_Cat="A-01-023" kEH_Desc="Man, seated on heel, both knees down, one arm visible, raised in front, hand at the height of the shoulder." kEH_Func="Classifier sitting" kEH_FVal="ḥ(m)sꞽ" kEH_UniK="A003D"/>
3746737467
<char cp="1346E" kEH_Cat="A-01-024" kEH_Core="N" kEH_UniK="HJ A003B" kEH_JSesh="A3B" kEH_HG="A3B" kEH_IFAO="1,14"/>
37468-
<char cp="1346F" kEH_Cat="A-01-028" kEH_Desc="Man, seated, both knees down, right arm raised with hand to mouth, left arm hanging beside the body." kEH_Func="Classifier eating/drinking/speech/ silence/thoughts feelings" kEH_UniK="A002F"/>
37468+
<char cp="1346F" kEH_Cat="A-01-028" kEH_Desc="Man, seated, both knees down, right arm raised with hand to mouth, left arm hanging beside the body." kEH_Func="Classifier eating/drinking/speech/silence/thoughts feelings" kEH_UniK="A002F"/>
3746937469
<char cp="13470" kEH_Cat="A-01-030" kEH_Desc="Man seated, right knee raised, right arm raised with hand to mouth, left arm raised in front of the body." kEH_Func="Classifier speech/sound" kEH_FVal="ṯḥḥ.wt" kEH_UniK="HJ A002A" kEH_JSesh="A2A" kEH_HG="A2A" kEH_IFAO="2,7"/>
3747037470
<char cp="13471" kEH_Cat="A-01-032" kEH_Desc="Man, seated, both knees raised, foot flat on the ground, right arm raised with hand to mouth, left arm not visible." kEH_Func="Classifier speech/silence" kEH_FVal="dbḥ" kEH_UniK="A002G"/>
3747137471
<char cp="13472" kEH_Cat="A-01-033" kEH_Desc="Man, seated, right knee raised, right arm raised, hand vertical, hand palm towards the face, in front of the face, left arm hanging beside the body." kEH_Func="Classifier acclaim, speaking" kEH_FVal="nhm" kEH_UniK="A002H"/>

unicodetools/src/main/java/org/unicode/props/PropertyParsingInfo.java

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ enum SpecialProperty {
9292
public static final Pattern SPACE = Pattern.compile("\\s+");
9393
public static final Pattern EQUALS = Pattern.compile("\\s*=\\s*");
9494
public static final Pattern COMMA = Pattern.compile("\\s*,\\s*");
95+
public static final Pattern SLASH = Pattern.compile("\\s*/\\s*");
9596
public static final Pattern PIPE_SLASH = Pattern.compile("\\s*[|/]\\s*");
9697
public static final Pattern DECOMP_REMOVE = Pattern.compile("\\{[^}]+\\}|\\<[^>]+\\>");
9798

@@ -536,11 +537,17 @@ enum FileType {
536537
}
537538

538539
public void setMultiValued(String multivalued2) {
539-
if (property == UcdProperty.Name_Alias || property == UcdProperty.Standardized_Variant) {
540-
multivaluedSplit = NO_SPLIT;
541-
}
542-
if (property == UcdProperty.kEH_FVal) {
543-
multivaluedSplit = PIPE_SLASH;
540+
switch (property) {
541+
case Name_Alias:
542+
case Standardized_Variant:
543+
multivaluedSplit = NO_SPLIT;
544+
break;
545+
case kEH_FVal:
546+
multivaluedSplit = PIPE_SLASH;
547+
break;
548+
case kEH_Func:
549+
multivaluedSplit = SLASH;
550+
break;
544551
}
545552
if (multivalued2.endsWith("_COMMA")) {
546553
multivaluedSplit = COMMA;

unicodetools/src/main/java/org/unicode/xml/AttributeResolver.java

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -158,8 +158,9 @@ public String getAttributeValue(UcdProperty prop, int codepoint) {
158158
case String:
159159
switch (prop) {
160160
case Equivalent_Unified_Ideograph:
161-
String EqUIdeo = getMappingValue(codepoint, resolvedValue, false, "");
162-
return (EqUIdeo.equals("#")) ? null : EqUIdeo;
161+
case kEH_AltSeq:
162+
String ignoreHash = getMappingValue(codepoint, resolvedValue, false, "");
163+
return (ignoreHash.equals("#")) ? null : ignoreHash;
163164
case kCompatibilityVariant:
164165
String kCompatibilityVariant =
165166
getMappingValue(codepoint, resolvedValue, false, "U+");
@@ -213,6 +214,10 @@ public String getAttributeValue(UcdProperty prop, int codepoint) {
213214
return Optional.ofNullable(resolvedValue).orElse("");
214215
case kDefinition:
215216
return resolvedValue;
217+
case kEH_Func:
218+
if (resolvedValue != null) {
219+
return resolvedValue.replaceAll("[|]+", "/");
220+
}
216221
case kEH_FVal:
217222
if (resolvedValue != null) {
218223
return resolvedValue.replaceAll("[|]+", " | ");

0 commit comments

Comments
 (0)