Skip to content

Commit 84139ed

Browse files
committed
Fix comma
1 parent ac0201d commit 84139ed

File tree

2 files changed

+13
-9
lines changed

2 files changed

+13
-9
lines changed

unicodetools/data/linkification/dev/LinkEmail.txt

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# LinkEmail.txt
2-
# Date: 2025-12-20, 21:02:29 GMT
2+
# Date: 2025-12-24, 21:06:25 GMT
33
# © 2025 Unicode®, Inc.
44
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
55
# For terms of use and license, see https://www.unicode.org/terms_of_use.html
@@ -26,7 +26,8 @@
2626
#
2727
0021 # 1.1 (!) EXCLAMATION MARK
2828
0023..0027 # 1.1 [5] (#..') NUMBER SIGN..APOSTROPHE
29-
002A..0039 # 1.1 [16] (*..9) ASTERISK..DIGIT NINE
29+
002A..002B # 1.1 [2] (*..+) ASTERISK..PLUS SIGN
30+
002D..0039 # 1.1 [13] (-..9) HYPHEN-MINUS..DIGIT NINE
3031
003D # 1.1 (=) EQUALS SIGN
3132
003F # 1.1 (?) QUESTION MARK
3233
0041..005A # 1.1 [26] (A..Z) LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z
@@ -1292,4 +1293,4 @@ FFDA..FFDC # 1.1 [3] (ᅳ..ᅵ) HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN
12921293
323B0..33479 # 17.0 [4298] (𲎰..𳑹) CJK UNIFIED IDEOGRAPH-323B0..CJK UNIFIED IDEOGRAPH-33479
12931294
E0100..E01EF # 4.0 [240] (U+E0100..U+E01EF) VARIATION SELECTOR-17..VARIATION SELECTOR-256
12941295

1295-
# Total code points: 149241
1296+
# Total code points: 149240

unicodetools/src/main/java/org/unicode/utilities/LinkUtilities.java

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -190,14 +190,17 @@ private LinkTermination(String uset) {
190190
}
191191
}
192192

193-
// Note: the source standards are painful to read.
194-
// https://en.wikipedia.org/wiki/Email_address#Local-part is much easier
193+
// https://datatracker.ietf.org/doc/html/rfc5322#section-3.2.3 has the full list for ASCII part
194+
// See also https://en.wikipedia.org/wiki/Email_address#Local-part
195+
// We add dot (ascii '.'), and then check after for the special dot constraints.
195196

196-
static final UnicodeSet EMAIL_EXCLUDES =
197-
new UnicodeSet("[\\u0020 ; \\: \" ( ) \\[ \\] @ \\\\ < >]").freeze();
197+
static final UnicodeSet EMAIL_ASCII_INCLUDES =
198+
new UnicodeSet("[[a-zA-Z][0-9][_ \\- ! ? ' \\{ \\} * / \\& # % ` \\^ + = | ~ \\$]]")
199+
.add('.')
200+
.freeze();
198201
static final UnicodeSet validEmailLocalPart =
199-
new UnicodeSet("[\\p{XID_Continue}\\p{block=basic_latin}-\\p{Cc}]")
200-
.removeAll(EMAIL_EXCLUDES)
202+
new UnicodeSet("[\\p{XID_Continue}-\\p{block=basic_latin}]")
203+
.addAll(EMAIL_ASCII_INCLUDES)
201204
.freeze();
202205
public static final UnicodeProperty LinkEmail =
203206
new UnicodeSetProperty()

0 commit comments

Comments
 (0)