File tree Expand file tree Collapse file tree 2 files changed +13
-9
lines changed
src/main/java/org/unicode/utilities Expand file tree Collapse file tree 2 files changed +13
-9
lines changed Original file line number Diff line number Diff line change 11# LinkEmail.txt
2- # Date: 2025-12-20 , 21:02:29 GMT
2+ # Date: 2025-12-24 , 21:06:25 GMT
33# © 2025 Unicode®, Inc.
44# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
55# For terms of use and license, see https://www.unicode.org/terms_of_use.html
2626#
27270021 # 1.1 (!) EXCLAMATION MARK
28280023..0027 # 1.1 [5] (#..') NUMBER SIGN..APOSTROPHE
29- 002A..0039 # 1.1 [16] (*..9) ASTERISK..DIGIT NINE
29+ 002A..002B # 1.1 [2] (*..+) ASTERISK..PLUS SIGN
30+ 002D..0039 # 1.1 [13] (-..9) HYPHEN-MINUS..DIGIT NINE
3031003D # 1.1 (=) EQUALS SIGN
3132003F # 1.1 (?) QUESTION MARK
32330041..005A # 1.1 [26] (A..Z) LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z
@@ -1292,4 +1293,4 @@ FFDA..FFDC # 1.1 [3] (ᅳ..ᅵ) HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN
12921293323B0..33479 # 17.0 [4298] (..) CJK UNIFIED IDEOGRAPH-323B0..CJK UNIFIED IDEOGRAPH-33479
12931294E0100..E01EF # 4.0 [240] (U+E0100..U+E01EF) VARIATION SELECTOR-17..VARIATION SELECTOR-256
12941295
1295- # Total code points: 149241
1296+ # Total code points: 149240
Original file line number Diff line number Diff line change @@ -190,14 +190,17 @@ private LinkTermination(String uset) {
190190 }
191191 }
192192
193- // Note: the source standards are painful to read.
194- // https://en.wikipedia.org/wiki/Email_address#Local-part is much easier
193+ // https://datatracker.ietf.org/doc/html/rfc5322#section-3.2.3 has the full list for ASCII part
194+ // See also https://en.wikipedia.org/wiki/Email_address#Local-part
195+ // We add dot (ascii '.'), and then check after for the special dot constraints.
195196
196- static final UnicodeSet EMAIL_EXCLUDES =
197- new UnicodeSet ("[\\ u0020 ; \\ : \" ( ) \\ [ \\ ] @ \\ \\ < >]" ).freeze ();
197+ static final UnicodeSet EMAIL_ASCII_INCLUDES =
198+ new UnicodeSet ("[[a-zA-Z][0-9][_ \\ - ! ? ' \\ { \\ } * / \\ & # % ` \\ ^ + = | ~ \\ $]]" )
199+ .add ('.' )
200+ .freeze ();
198201 static final UnicodeSet validEmailLocalPart =
199- new UnicodeSet ("[\\ p{XID_Continue}\\ p{block=basic_latin}- \\ p{Cc }]" )
200- .removeAll ( EMAIL_EXCLUDES )
202+ new UnicodeSet ("[\\ p{XID_Continue}- \\ p{block=basic_latin}]" )
203+ .addAll ( EMAIL_ASCII_INCLUDES )
201204 .freeze ();
202205 public static final UnicodeProperty LinkEmail =
203206 new UnicodeSetProperty ()
You can’t perform that action at this time.
0 commit comments