Skip to content

Commit 74cbecb

Browse files
committed
Two bugs, each of which would pin LinkEmail to 17.0
1 parent 6b213eb commit 74cbecb

File tree

3 files changed

+51
-4
lines changed

3 files changed

+51
-4
lines changed

unicodetools/data/linkification/dev/LinkEmail.txt

Lines changed: 41 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# LinkEmail.txt
2-
# Date: 2025-12-24, 00:04:19 GMT
2+
# Date: 2025-12-24, 02:37:15 GMT
33
# © 2025 Unicode®, Inc.
44
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
55
# For terms of use and license, see https://www.unicode.org/terms_of_use.html
@@ -121,10 +121,12 @@
121121
0526..0527 # 6.0 [2] (Ԧ..ԧ) CYRILLIC CAPITAL LETTER SHHA WITH DESCENDER..CYRILLIC SMALL LETTER SHHA WITH DESCENDER
122122
0528..052F # 7.0 [8] (Ԩ..ԯ) CYRILLIC CAPITAL LETTER EN WITH LEFT HOOK..CYRILLIC SMALL LETTER EL WITH DESCENDER
123123
0531..0556 # 1.1 [38] (Ա..Ֆ) ARMENIAN CAPITAL LETTER AYB..ARMENIAN CAPITAL LETTER FEH
124+
0558 # 18.0 (U+0558) MODIFIER LETTER ARMENIAN SMALL EH
124125
0559 # 1.1 (ՙ) ARMENIAN MODIFIER LETTER LEFT HALF RING
125126
0560 # 11.0 (ՠ) ARMENIAN SMALL LETTER TURNED AYB
126127
0561..0587 # 1.1 [39] (ա..և) ARMENIAN SMALL LETTER AYB..ARMENIAN SMALL LIGATURE ECH YIWN
127128
0588 # 11.0 (ֈ) ARMENIAN SMALL LETTER YI WITH STROKE
129+
058B..058C # 18.0 [2] (U+058B..U+058C) MODIFIER LETTER ARMENIAN SMALL INI..MODIFIER LETTER ARMENIAN SMALL YI
128130
0591..05A1 # 2.0 [17] (֑..֡) HEBREW ACCENT ETNAHTA..HEBREW ACCENT PAZER
129131
05A2 # 4.1 (֢) HEBREW ACCENT ATNAH HAFUKH
130132
05A3..05AF # 2.0 [13] (֣..֯) HEBREW ACCENT MUNAH..HEBREW MARK MASORA CIRCLE
@@ -136,6 +138,7 @@
136138
05C4 # 2.0 (ׄ) HEBREW MARK UPPER DOT
137139
05C5 # 4.1 (ׅ) HEBREW MARK LOWER DOT
138140
05C7 # 4.1 (ׇ) HEBREW POINT QAMATS QATAN
141+
05C8 # 18.0 (U+05C8) HEBREW POINT SHEVA NA MUDGASH
139142
05D0..05EA # 1.1 [27] (א..ת) HEBREW LETTER ALEF..HEBREW LETTER TAV
140143
05EF # 11.0 (ׯ) HEBREW YOD TRIANGLE
141144
05F0..05F2 # 1.1 [3] (װ..ײ) HEBREW LIGATURE YIDDISH DOUBLE VAV..HEBREW LIGATURE YIDDISH DOUBLE YOD
@@ -220,6 +223,7 @@
220223
097E..097F # 5.0 [2] (ॾ..ॿ) DEVANAGARI LETTER DDDA..DEVANAGARI LETTER BBA
221224
0980 # 7.0 (ঀ) BENGALI ANJI
222225
0981..0983 # 1.1 [3] (ঁ..ঃ) BENGALI SIGN CANDRABINDU..BENGALI SIGN VISARGA
226+
0984 # 18.0 (U+0984) BENGALI SIGN COMBINING ANUSVARA ABOVE
223227
0985..098C # 1.1 [8] (অ..ঌ) BENGALI LETTER A..BENGALI LETTER VOCALIC L
224228
098F..0990 # 1.1 [2] (এ..ঐ) BENGALI LETTER E..BENGALI LETTER AI
225229
0993..09A8 # 1.1 [22] (ও..ন) BENGALI LETTER O..BENGALI LETTER NA
@@ -238,6 +242,7 @@
238242
09E6..09F1 # 1.1 [12] (০..ৱ) BENGALI DIGIT ZERO..BENGALI LETTER RA WITH LOWER DIAGONAL
239243
09FC # 10.0 (ৼ) BENGALI LETTER VEDIC ANUSVARA
240244
09FE # 11.0 (৾) BENGALI SANDHI MARK
245+
09FF # 18.0 (U+09FF) BENGALI LETTER SANSKRIT BA
241246
0A01 # 4.0 (ਁ) GURMUKHI SIGN ADAK BINDI
242247
0A02 # 1.1 (ਂ) GURMUKHI SIGN BINDI
243248
0A03 # 4.0 (ਃ) GURMUKHI SIGN VISARGA
@@ -287,6 +292,7 @@
287292
0B44 # 5.1 (ୄ) ORIYA VOWEL SIGN VOCALIC RR
288293
0B47..0B48 # 1.1 [2] (େ..ୈ) ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI
289294
0B4B..0B4D # 1.1 [3] (ୋ..୍) ORIYA VOWEL SIGN O..ORIYA SIGN VIRAMA
295+
0B53..0B54 # 18.0 [2] (U+0B53..U+0B54) ORIYA SIGN DOT ABOVE..ORIYA SIGN DOUBLE DOT ABOVE
290296
0B55 # 13.0 (୕) ORIYA SIGN OVERLINE
291297
0B56..0B57 # 1.1 [2] (ୖ..ୗ) ORIYA AI LENGTH MARK..ORIYA AU LENGTH MARK
292298
0B5C..0B5D # 1.1 [2] (ଡ଼..ଢ଼) ORIYA LETTER RRA..ORIYA LETTER RHA
@@ -544,6 +550,7 @@
544550
1810..1819 # 3.0 [10] (᠐..᠙) MONGOLIAN DIGIT ZERO..MONGOLIAN DIGIT NINE
545551
1820..1877 # 3.0 [88] (ᠠ..ᡷ) MONGOLIAN LETTER A..MONGOLIAN LETTER MANCHU ZHA
546552
1878 # 11.0 (ᡸ) MONGOLIAN LETTER CHA WITH TWO DOTS
553+
1879 # 18.0 (U+1879) MONGOLIAN LETTER ALTERNATE UE
547554
1880..18A9 # 3.0 [42] (ᢀ..ᢩ) MONGOLIAN LETTER ALI GALI ANUSVARA ONE..MONGOLIAN LETTER ALI GALI DAGALGA
548555
18AA # 5.1 (ᢪ) MONGOLIAN LETTER MANCHU ALI GALI LHA
549556
18B0..18F5 # 5.2 [70] (ᢰ..ᣵ) CANADIAN SYLLABICS OY..CANADIAN SYLLABICS CARRIER DENTAL S
@@ -568,7 +575,9 @@
568575
1ABF..1AC0 # 13.0 [2] (ᪿ..ᫀ) COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER TURNED W BELOW
569576
1AC1..1ACE # 14.0 [14] (᫁..ᫎ) COMBINING LEFT PARENTHESIS ABOVE LEFT..COMBINING LATIN SMALL LETTER INSULAR T
570577
1ACF..1ADD # 17.0 [15] (᫏..᫝) COMBINING DOUBLE CARON..COMBINING DOT-AND-RING BELOW
578+
1ADE..1ADF # 18.0 [2] (U+1ADE..U+1ADF) COMBINING GRAVE-DOT..COMBINING DOT-ACUTE
571579
1AE0..1AEB # 17.0 [12] (᫠..᫫) COMBINING LEFT TACK ABOVE..COMBINING DOUBLE RIGHTWARDS ARROW ABOVE
580+
1AEC..1AF0 # 18.0 [5] (U+1AEC..U+1AF0) COMBINING CARON-ACUTE..COMBINING DOUBLE COMMA ABOVE
572581
1B00..1B4B # 5.0 [76] (ᬀ..ᭋ) BALINESE SIGN ULU RICEM..BALINESE LETTER ASYURA SASAK
573582
1B4C # 14.0 (ᭌ) BALINESE LETTER ARCHAIC JNYA
574583
1B50..1B59 # 5.0 [10] (᭐..᭙) BALINESE DIGIT ZERO..BALINESE DIGIT NINE
@@ -631,8 +640,10 @@
631640
2054 # 4.0 (⁔) INVERTED UNDERTIE
632641
2071 # 3.2 (ⁱ) SUPERSCRIPT LATIN SMALL LETTER I
633642
207F # 1.1 (ⁿ) SUPERSCRIPT LATIN SMALL LETTER N
643+
208F # 18.0 (U+208F) MODIFIER LETTER HIGH AND LOW VERTICAL LINE
634644
2090..2094 # 4.1 [5] (ₐ..ₔ) LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER SCHWA
635645
2095..209C # 6.0 [8] (ₕ..ₜ) LATIN SUBSCRIPT SMALL LETTER H..LATIN SUBSCRIPT SMALL LETTER T
646+
209D..209F # 18.0 [3] (U+209D..U+209F) LATIN SUBSCRIPT SMALL LETTER W..LATIN SUBSCRIPT SMALL LETTER Z
636647
20D0..20DC # 1.1 [13] (⃐..⃜) COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE
637648
20E1 # 1.1 (⃡) COMBINING LEFT RIGHT ARROW ABOVE
638649
20E5..20EA # 3.2 [6] (⃥..⃪) COMBINING REVERSE SOLIDUS OVERLAY..COMBINING LEFTWARDS ARROW OVERLAY
@@ -764,6 +775,8 @@ A7D3 # 14.0 (ꟓ) LATIN SMALL LETTER DOUBLE THORN
764775
A7D4 # 17.0 (꟔) LATIN CAPITAL LETTER DOUBLE WYNN
765776
A7D5..A7D9 # 14.0 [5] (ꟕ..ꟙ) LATIN SMALL LETTER DOUBLE WYNN..LATIN SMALL LETTER SIGMOID S
766777
A7DA..A7DC # 16.0 [3] (Ꟛ..Ƛ) LATIN CAPITAL LETTER LAMBDA..LATIN CAPITAL LETTER LAMBDA WITH STROKE
778+
A7DD # 18.0 (U+A7DD) LATIN CAPITAL LETTER CLOSED OMEGA
779+
A7E2 # 18.0 (U+A7E2) LATIN CAPITAL LETTER R WITH LONG LEG
767780
A7F1 # 17.0 (꟱) MODIFIER LETTER CAPITAL S
768781
A7F2..A7F4 # 14.0 [3] (ꟲ..ꟴ) MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q
769782
A7F5..A7F6 # 13.0 [2] (Ꟶ..ꟶ) LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H
@@ -808,6 +821,7 @@ AB60..AB63 # 8.0 [4] (ꭠ..ꭣ) LATIN SMALL LETTER SAKHA YAT..LATIN SMALL
808821
AB64..AB65 # 7.0 [2] (ꭤ..ꭥ) LATIN SMALL LETTER INVERTED ALPHA..GREEK LETTER SMALL CAPITAL OMEGA
809822
AB66..AB67 # 12.0 [2] (ꭦ..ꭧ) LATIN SMALL LETTER DZ DIGRAPH WITH RETROFLEX HOOK..LATIN SMALL LETTER TS DIGRAPH WITH RETROFLEX HOOK
810823
AB68..AB69 # 13.0 [2] (ꭨ..ꭩ) LATIN SMALL LETTER TURNED R WITH MIDDLE TILDE..MODIFIER LETTER SMALL TURNED W
824+
AB6C..AB6D # 18.0 [2] (U+AB6C..U+AB6D) LATIN CAPITAL LETTER SCRIPT R..LATIN CAPITAL LETTER SCRIPT R WITH RING
811825
AB70..ABBF # 8.0 [80] (ꭰ..ꮿ) CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA
812826
ABC0..ABEA # 5.2 [43] (ꯀ..ꯪ) MEETEI MAYEK LETTER KOK..MEETEI MAYEK VOWEL SIGN NUNG
813827
ABEC..ABED # 5.2 [2] (꯬..꯭) MEETEI MAYEK LUM IYEK..MEETEI MAYEK APUN IYEK
@@ -903,6 +917,7 @@ FFDA..FFDC # 1.1 [3] (ᅳ..ᅵ) HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN
903917
10780..10785 # 14.0 [6] (𐞀..𐞅) MODIFIER LETTER SMALL CAPITAL AA..MODIFIER LETTER SMALL B WITH HOOK
904918
10787..107B0 # 14.0 [42] (𐞇..𐞰) MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK
905919
107B2..107BA # 14.0 [9] (𐞲..𐞺) MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL
920+
107BB..107BF # 18.0 [5] (U+107BB..U+107BF) MODIFIER LETTER SMALL TURNED T..MODIFIER LETTER SMALL ESH WITH DOUBLE BAR
906921
10800..10805 # 4.0 [6] (𐠀..𐠅) CYPRIOT SYLLABLE A..CYPRIOT SYLLABLE JA
907922
10808 # 4.0 (𐠈) CYPRIOT SYLLABLE JO
908923
1080A..10835 # 4.0 [44] (𐠊..𐠵) CYPRIOT SYLLABLE KA..CYPRIOT SYLLABLE WO
@@ -948,6 +963,8 @@ FFDA..FFDC # 1.1 [3] (ᅳ..ᅵ) HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN
948963
10EB0..10EB1 # 13.0 [2] (𐺰..𐺱) YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE
949964
10EC2..10EC4 # 16.0 [3] (𐻂..𐻄) ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW
950965
10EC5..10EC7 # 17.0 [3] (𐻅..𐻇) ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW..ARABIC LETTER YEH WITH FOUR DOTS BELOW
966+
10ED9..10EEE # 18.0 [22] (U+10ED9..U+10EEE) ARABIC CROWN LETTER BEH..ARABIC CROWN LETTER YEH
967+
10EF9 # 18.0 (U+10EF9) ARABIC MARK CROWN
951968
10EFA..10EFB # 17.0 [2] (𐻺..𐻻) ARABIC DOUBLE VERTICAL BAR BELOW..ARABIC SMALL LOW NOON
952969
10EFC # 16.0 (𐻼) ARABIC COMBINING ALEF OVERLAY
953970
10EFD..10EFF # 15.0 [3] (𐻽..𐻿) ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA
@@ -1061,6 +1078,7 @@ FFDA..FFDC # 1.1 [3] (ᅳ..ᅵ) HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN
10611078
11A9D # 11.0 (𑪝) SOYOMBO MARK PLUTA
10621079
11AB0..11ABF # 14.0 [16] (𑪰..𑪿) CANADIAN SYLLABICS NATTILIK HI..CANADIAN SYLLABICS SPA
10631080
11AC0..11AF8 # 7.0 [57] (𑫀..𑫸) PAU CIN HAU LETTER PA..PAU CIN HAU GLOTTAL STOP FINAL
1081+
11B0A # 18.0 (U+11B0A) DEVANAGARI LETTER ALTERNATE DDDA
10641082
11B60..11B67 # 17.0 [8] (𑭠..𑭧) SHARADA VOWEL SIGN OE..SHARADA VOWEL SIGN CANDRA O
10651083
11BC0..11BE0 # 16.0 [33] (𑯀..𑯠) SUNUWAR LETTER DEVI..SUNUWAR LETTER KLOKO
10661084
11BF0..11BF9 # 16.0 [10] (𑯰..𑯹) SUNUWAR DIGIT ZERO..SUNUWAR DIGIT NINE
@@ -1098,7 +1116,10 @@ FFDA..FFDC # 1.1 [3] (ᅳ..ᅵ) HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN
10981116
12399 # 8.0 (𒎙) CUNEIFORM SIGN U U
10991117
12400..12462 # 5.0 [99] (𒐀..𒑢) CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN OLD ASSYRIAN ONE QUARTER
11001118
12463..1246E # 7.0 [12] (𒑣..𒑮) CUNEIFORM NUMERIC SIGN ONE QUARTER GUR..CUNEIFORM NUMERIC SIGN NINE U VARIANT FORM
1119+
1246F # 18.0 (U+1246F) CUNEIFORM NUMERIC SIGN SEVEN ASH TENU
1120+
12475..1247F # 18.0 [11] (U+12475..U+1247F) CUNEIFORM NUMERIC SIGN EIGHT ASH TENU..CUNEIFORM NUMERIC SIGN ASH TIMES NINE DISH TENU
11011121
12480..12543 # 8.0 [196] (𒒀..𒕃) CUNEIFORM SIGN AB TIMES NUN TENU..CUNEIFORM SIGN ZU5 TIMES THREE DISH TENU
1122+
12550..12686 # 18.0 [311] (U+12550..U+12686) CUNEIFORM NUMERIC SIGN ONE N01..CUNEIFORM NUMERIC SIGN ONE N36 FLAT
11021123
12F90..12FF0 # 14.0 [97] (𒾐..𒿰) CYPRO-MINOAN SIGN CM001..CYPRO-MINOAN SIGN CM114
11031124
13000..1342E # 5.2 [1071] (𓀀..𓐮) EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032
11041125
1342F # 15.0 (𓐯) EGYPTIAN HIEROGLYPH V011D
@@ -1120,6 +1141,8 @@ FFDA..FFDC # 1.1 [3] (ᅳ..ᅵ) HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN
11201141
16B7D..16B8F # 7.0 [19] (𖭽..𖮏) PAHAWH HMONG CLAN SIGN TSHEEJ..PAHAWH HMONG CLAN SIGN VWJ
11211142
16D40..16D6C # 16.0 [45] (𖵀..𖵬) KIRAT RAI SIGN ANUSVARA..KIRAT RAI SIGN SAAT
11221143
16D70..16D79 # 16.0 [10] (𖵰..𖵹) KIRAT RAI DIGIT ZERO..KIRAT RAI DIGIT NINE
1144+
16D80..16D9D # 18.0 [30] (U+16D80..U+16D9D) CHISOI LETTER A..CHISOI SIGN SISO
1145+
16DA0..16DA9 # 18.0 [10] (U+16DA0..U+16DA9) CHISOI DIGIT ZERO..CHISOI DIGIT NINE
11231146
16E40..16E7F # 11.0 [64] (𖹀..𖹿) MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y
11241147
16EA0..16EB8 # 17.0 [25] (𖺠..𖺸) BERIA ERFE CAPITAL LETTER ARKAB..BERIA ERFE CAPITAL LETTER AY
11251148
16EBB..16ED3 # 17.0 [25] (𖺻..𖻓) BERIA ERFE SMALL LETTER ARKAB..BERIA ERFE SMALL LETTER AY
@@ -1141,20 +1164,26 @@ FFDA..FFDC # 1.1 [3] (ᅳ..ᅵ) HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN
11411164
187F8..187FF # 17.0 [8] (𘟸..𘟿) TANGUT IDEOGRAPH-187F8..TANGUT IDEOGRAPH-187FF
11421165
18800..18AF2 # 9.0 [755] (𘠀..𘫲) TANGUT COMPONENT-001..TANGUT COMPONENT-755
11431166
18AF3..18CD5 # 13.0 [483] (𘫳..𘳕) TANGUT COMPONENT-756..KHITAN SMALL SCRIPT CHARACTER-18CD5
1167+
18CD6..18CDA # 18.0 [5] (U+18CD6..U+18CDA) KHITAN SMALL SCRIPT CHARACTER-18CD6..KHITAN SMALL SCRIPT CHARACTER-18CDA
11441168
18CFF # 16.0 (𘳿) KHITAN SMALL SCRIPT CHARACTER-18CFF
11451169
18D00..18D08 # 13.0 [9] (𘴀..𘴈) TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08
11461170
18D09..18D1E # 17.0 [22] (𘴉..𘴞) TANGUT IDEOGRAPH-18D09..TANGUT IDEOGRAPH-18D1E
1171+
18D1F..18D20 # 18.0 [2] (U+18D1F..U+18D20) TANGUT IDEOGRAPH-18D1F..TANGUT IDEOGRAPH-18D20
11471172
18D80..18DF2 # 17.0 [115] (𘶀..𘷲) TANGUT COMPONENT-769..TANGUT COMPONENT-883
1173+
18E00..19191 # 18.0 [914] (U+18E00..U+19191) JURCHEN CHARACTER-18E00..JURCHEN CHARACTER-19191
1174+
191A0..191D2 # 18.0 [51] (U+191A0..U+191D2) JURCHEN RADICAL-01..JURCHEN RADICAL-51
11481175
1AFF0..1AFF3 # 14.0 [4] (𚿰..𚿳) KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5
11491176
1AFF5..1AFFB # 14.0 [7] (𚿵..𚿻) KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5
11501177
1AFFD..1AFFE # 14.0 [2] (𚿽..𚿾) KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8
11511178
1B000..1B001 # 6.0 [2] (𛀀..𛀁) KATAKANA LETTER ARCHAIC E..HIRAGANA LETTER ARCHAIC YE
11521179
1B002..1B11E # 10.0 [285] (𛀂..𛄞) HENTAIGANA LETTER A-1..HENTAIGANA LETTER N-MU-MO-2
11531180
1B11F..1B122 # 14.0 [4] (𛄟..𛄢) HIRAGANA LETTER ARCHAIC WU..KATAKANA LETTER ARCHAIC WU
1181+
1B123..1B128 # 18.0 [6] (U+1B123..U+1B128) HIRAGANA DIGRAPH KOTO..KATAKANA LETTER ALTERNATE WI
11541182
1B132 # 15.0 (𛄲) HIRAGANA LETTER SMALL KO
11551183
1B150..1B152 # 12.0 [3] (𛅐..𛅒) HIRAGANA LETTER SMALL WI..HIRAGANA LETTER SMALL WO
11561184
1B155 # 15.0 (𛅕) KATAKANA LETTER SMALL KO
11571185
1B164..1B167 # 12.0 [4] (𛅤..𛅧) KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL N
1186+
1B168 # 18.0 (U+1B168) KATAKANA LETTER SMALL ARCHAIC YE
11581187
1B170..1B2FB # 10.0 [396] (𛅰..𛋻) NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB
11591188
1BC00..1BC6A # 7.0 [107] (𛰀..𛱪) DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M
11601189
1BC70..1BC7C # 7.0 [13] (𛱰..𛱼) DUPLOYAN AFFIX LEFT HORIZONTAL SECANT..DUPLOYAN AFFIX ATTACHED TANGENT HOOK
@@ -1164,12 +1193,17 @@ FFDA..FFDC # 1.1 [3] (ᅳ..ᅵ) HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN
11641193
1CCF0..1CCF9 # 16.0 [10] (𜳰..𜳹) OUTLINED DIGIT ZERO..OUTLINED DIGIT NINE
11651194
1CF00..1CF2D # 14.0 [46] (𜼀..𜼭) ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT..ZNAMENNY COMBINING MARK KRYZH ON LEFT
11661195
1CF30..1CF46 # 14.0 [23] (𜼰..𜽆) ZNAMENNY COMBINING TONAL RANGE MARK MRACHNO..ZNAMENNY PRIZNAK MODIFIER ROG
1196+
1D127..1D128 # 18.0 [2] (U+1D127..U+1D128) MUSICAL SYMBOL COMBINING STRESS..MUSICAL SYMBOL COMBINING UNSTRESS
11671197
1D165..1D169 # 3.1 [5] (𝅥..𝅩) MUSICAL SYMBOL COMBINING STEM..MUSICAL SYMBOL COMBINING TREMOLO-3
11681198
1D16D..1D172 # 3.1 [6] (𝅭..𝅲) MUSICAL SYMBOL COMBINING AUGMENTATION DOT..MUSICAL SYMBOL COMBINING FLAG-5
11691199
1D17B..1D182 # 3.1 [8] (𝅻..𝆂) MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE
11701200
1D185..1D18B # 3.1 [7] (𝆅..𝆋) MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE
11711201
1D1AA..1D1AD # 3.1 [4] (𝆪..𝆭) MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO
11721202
1D242..1D244 # 4.1 [3] (𝉂..𝉄) COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME
1203+
1D250..1D252 # 18.0 [3] (U+1D250..U+1D252) MUSICAL SYMBOL COMBINING FLAG-6..MUSICAL SYMBOL COMBINING FLAG-8
1204+
1D25B..1D25C # 18.0 [2] (U+1D25B..U+1D25C) MUSICAL SYMBOL COMBINING TREMOLO-4..MUSICAL SYMBOL COMBINING TREMOLO-5
1205+
1D25F # 18.0 (U+1D25F) MUSICAL SYMBOL COMBINING BUZZ ROLL STEM
1206+
1D280..1D281 # 18.0 [2] (U+1D280..U+1D281) MUSICAL SYMBOL COMBINING STEM BOW BEHIND BRIDGE..MUSICAL SYMBOL COMBINING STEM BOW ON TOP OF BRIDGE
11731207
1D400..1D454 # 3.1 [85] (𝐀..𝑔) MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G
11741208
1D456..1D49C # 3.1 [71] (𝑖..𝒜) MATHEMATICAL ITALIC SMALL I..MATHEMATICAL SCRIPT CAPITAL A
11751209
1D49E..1D49F # 3.1 [2] (𝒞..𝒟) MATHEMATICAL SCRIPT CAPITAL C..MATHEMATICAL SCRIPT CAPITAL D
@@ -1212,7 +1246,10 @@ FFDA..FFDC # 1.1 [3] (ᅳ..ᅵ) HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN
12121246
1DA9B..1DA9F # 8.0 [5] (𝪛..𝪟) SIGNWRITING FILL MODIFIER-2..SIGNWRITING FILL MODIFIER-6
12131247
1DAA1..1DAAF # 8.0 [15] (𝪡..𝪯) SIGNWRITING ROTATION MODIFIER-2..SIGNWRITING ROTATION MODIFIER-16
12141248
1DF00..1DF1E # 14.0 [31] (𝼀..𝼞) LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER S WITH CURL
1249+
1DF1F..1DF24 # 18.0 [6] (U+1DF1F..U+1DF24) LATIN SMALL LETTER D-ETH DIGRAPH..LATIN SMALL LETTER T-THETA DIGRAPH
12151250
1DF25..1DF2A # 15.0 [6] (𝼥..𝼪) LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK
1251+
1DF2B..1DF81 # 18.0 [87] (U+1DF2B..U+1DF81) LATIN SMALL LETTER DEZH DIGRAPH WITH CURL..LATIN CAPITAL LETTER E WITH BENT TOPBAR
1252+
1DFCD..1DFFF # 18.0 [51] (U+1DFCD..U+1DFFF) MODIFIER LETTER SMALL TURNED R WITH MID-HEIGHT LEFT HOOK..MODIFIER LETTER SMALL T WITH HOOK AND RETROFLEX HOOK
12161253
1E000..1E006 # 9.0 [7] (𞀀..𞀆) COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE
12171254
1E008..1E018 # 9.0 [17] (𞀈..𞀘) COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU
12181255
1E01B..1E021 # 9.0 [7] (𞀛..𞀡) COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI
@@ -1282,6 +1319,7 @@ FFDA..FFDC # 1.1 [3] (ᅳ..ᅵ) HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN
12821319
2B739 # 15.0 (𫜹) CJK UNIFIED IDEOGRAPH-2B739
12831320
2B73A..2B73F # 17.0 [6] (𫜺..𫜿) CJK UNIFIED IDEOGRAPH-2B73A..CJK UNIFIED IDEOGRAPH-2B73F
12841321
2B740..2B81D # 6.0 [222] (𫝀..𫠝) CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D
1322+
2B81E # 18.0 (U+2B81E) CJK UNIFIED IDEOGRAPH-2B81E
12851323
2B820..2CEA1 # 8.0 [5762] (𫠠..𬺡) CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1
12861324
2CEA2..2CEAD # 17.0 [12] (𬺢..𬺭) CJK UNIFIED IDEOGRAPH-2CEA2..CJK UNIFIED IDEOGRAPH-2CEAD
12871325
2CEB0..2EBE0 # 10.0 [7473] (𬺰..𮯠) CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0
@@ -1290,6 +1328,7 @@ FFDA..FFDC # 1.1 [3] (ᅳ..ᅵ) HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN
12901328
30000..3134A # 13.0 [4939] (𰀀..𱍊) CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A
12911329
31350..323AF # 15.0 [4192] (𱍐..𲎯) CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF
12921330
323B0..33479 # 17.0 [4298] (𲎰..𳑹) CJK UNIFIED IDEOGRAPH-323B0..CJK UNIFIED IDEOGRAPH-33479
1331+
3D000..3FC3F # 18.0 [11328] (U+3D000..U+3FC3F) SEAL CHARACTER-3D000..SEAL CHARACTER-3FC3F
12931332
E0100..E01EF # 4.0 [240] (U+E0100..U+E01EF) VARIATION SELECTOR-17..VARIATION SELECTOR-256
12941333

1295-
# Total code points: 149241
1334+
# Total code points: 162119

unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1804,6 +1804,10 @@ public String _getValue(String string) {
18041804
protected List<String> _getAvailableValues(List<String> result) {
18051805
return YESNO;
18061806
}
1807+
1808+
public boolean hasUniformUnassigned() {
1809+
return false;
1810+
}
18071811
}
18081812

18091813
// private static class StringTransformProperty extends SimpleProperty {

unicodetools/src/main/java/org/unicode/utilities/LinkUtilities.java

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import java.nio.charset.StandardCharsets;
2626
import java.nio.file.Files;
2727
import java.nio.file.Path;
28+
import java.text.ParsePosition;
2829
import java.util.Comparator;
2930
import java.util.EnumMap;
3031
import java.util.EnumSet;
@@ -197,8 +198,11 @@ private LinkTermination(String uset) {
197198

198199
static final UnicodeSet EMAIL_EXCLUDES =
199200
new UnicodeSet("[\\u0020 ; \\: \" ( ) \\[ \\] @ \\\\ < >]").freeze();
200-
static final UnicodeSet validEmailLocalPart =
201-
new UnicodeSet("[\\p{XID_Continue}\\p{block=basic_latin}-\\p{Cc}]")
201+
public static final UnicodeSet validEmailLocalPart =
202+
new UnicodeSet(
203+
"[\\p{XID_Continue}\\p{block=basic_latin}-\\p{Cc}]",
204+
new ParsePosition(0),
205+
VersionedSymbolTable.frozenAt(Settings.LATEST_VERSION_INFO))
202206
.removeAll(EMAIL_EXCLUDES)
203207
.freeze();
204208
public static final UnicodeProperty LinkEmail =

0 commit comments

Comments
 (0)