Skip to content

Commit 62d373b

Browse files
committed
Minor cleanup
1 parent 1bd5c48 commit 62d373b

File tree

7 files changed

+22
-11
lines changed

7 files changed

+22
-11
lines changed

unicodetools/data/linkification/dev/LinkBracket.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# LinkBracket.txt
2-
# Date: 2025-12-11, 02:19:40 GMT
2+
# Date: 2025-12-12, 00:48:36 GMT
33
# © 2025 Unicode®, Inc.
44
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
55
# For terms of use and license, see https://www.unicode.org/terms_of_use.html

unicodetools/data/linkification/dev/LinkDetectionTest.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# LinkDetectionTest.txt
2-
# Date: 2025-12-11, 02:19:40 GMT
2+
# Date: 2025-12-12, 00:48:36 GMT
33
# © 2025 Unicode®, Inc.
44
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
55
# For terms of use and license, see https://www.unicode.org/terms_of_use.html

unicodetools/data/linkification/dev/LinkEmail.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# LinkEmail.txt
2-
# Date: 2025-12-11, 02:19:40 GMT
2+
# Date: 2025-12-12, 00:48:36 GMT
33
# © 2025 Unicode®, Inc.
44
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
55
# For terms of use and license, see https://www.unicode.org/terms_of_use.html

unicodetools/data/linkification/dev/LinkFormattingTest.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# LinkFormattingTest.txt
2-
# Date: 2025-12-11, 02:19:40 GMT
2+
# Date: 2025-12-12, 00:48:36 GMT
33
# © 2025 Unicode®, Inc.
44
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
55
# For terms of use and license, see https://www.unicode.org/terms_of_use.html

unicodetools/data/linkification/dev/LinkTerm.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# LinkTerm.txt
2-
# Date: 2025-12-11, 02:19:40 GMT
2+
# Date: 2025-12-12, 00:48:36 GMT
33
# © 2025 Unicode®, Inc.
44
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
55
# For terms of use and license, see https://www.unicode.org/terms_of_use.html

unicodetools/src/main/java/org/unicode/tools/GenerateLinkData.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,7 @@ static void writeTestHeader(
196196

197197
/** Generate property data for the UTS */
198198
static void generatePropertyData() {
199+
System.out.println("TLDs=\t" + Joiner.on(' ').join(LinkUtilities.TLDS));
199200

200201
BagFormatter bf = new BagFormatter(LinkUtilities.IUP).setLineSeparator("\n");
201202

@@ -211,6 +212,7 @@ static void generatePropertyData() {
211212
bf.showSetNames(out, propValue.base);
212213
out.println("");
213214
out.flush();
215+
System.out.println(propValue + "=\t" + propValue.base.toPattern(false));
214216
}
215217

216218
} catch (IOException e) {
@@ -237,7 +239,9 @@ static void generatePropertyData() {
237239
"LinkEmail",
238240
"Link_Email",
239241
UcdPropertyValues.Binary.No.toString());
240-
bf.showSetNames(out, LinkUtilities.LinkEmail.getSet(UcdPropertyValues.Binary.Yes));
242+
UnicodeSet linkEmailSet = LinkUtilities.LinkEmail.getSet(UcdPropertyValues.Binary.Yes);
243+
bf.showSetNames(out, linkEmailSet);
244+
System.out.println("LinkEmail=\t" + linkEmailSet.toPattern(false));
241245
} catch (IOException e) {
242246
throw new UncheckedIOException(e);
243247
}

unicodetools/src/main/java/org/unicode/utilities/LinkUtilities.java

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,15 @@
55
import com.google.common.collect.ComparisonChain;
66
import com.google.common.collect.ImmutableSet;
77
import com.google.common.collect.ImmutableSortedMap;
8+
import com.google.common.collect.ImmutableSortedSet;
89
import com.google.common.collect.Sets;
910
import com.google.common.util.concurrent.UncheckedExecutionException;
1011
import com.ibm.icu.impl.IDNA2003;
1112
import com.ibm.icu.impl.UnicodeMap;
1213
import com.ibm.icu.lang.UCharacter;
1314
import com.ibm.icu.lang.UProperty;
1415
import com.ibm.icu.lang.UProperty.NameChoice;
16+
import com.ibm.icu.text.Collator;
1517
import com.ibm.icu.text.StringPrepParseException;
1618
import com.ibm.icu.text.UTF16;
1719
import com.ibm.icu.text.UnicodeSet;
@@ -27,10 +29,12 @@
2729
import java.util.EnumSet;
2830
import java.util.HashMap;
2931
import java.util.List;
32+
import java.util.Locale;
3033
import java.util.Map;
3134
import java.util.Map.Entry;
3235
import java.util.NavigableMap;
3336
import java.util.Set;
37+
import java.util.SortedSet;
3438
import java.util.Stack;
3539
import java.util.TreeSet;
3640
import java.util.regex.Matcher;
@@ -50,8 +54,6 @@
5054
import org.unicode.utilities.LinkUtilities.LinkScanner;
5155

5256
public class LinkUtilities {
53-
private static final boolean SHOW_NON_ASCII_TLDS = true;
54-
5557
// allow changing UnicodeSet to use the current IndexUnicodeProperties
5658
public static final IndexUnicodeProperties IUP =
5759
IndexUnicodeProperties.make(VersionInfo.UNICODE_17_0);
@@ -830,6 +832,8 @@ public void showLinkPairedOpeners() {
830832
*/
831833
public static final Pattern TLD_SCANNER;
832834

835+
public static final SortedSet<String> TLDS;
836+
833837
public static final String DOTSET_STRING = "[.。]";
834838
public static final UnicodeSet DOTSET = new UnicodeSet("[.。]").freeze();
835839
public static final Splitter SPLIT_LABELS = Splitter.on(Pattern.compile("[.。]"));
@@ -870,10 +874,13 @@ public int compare(String o1, String o2) {
870874
}
871875
});
872876
String pattern = "(?u)" + DOTSET_STRING + "(" + Joiner.on('|').join(core) + ")";
877+
TLDS =
878+
core.stream()
879+
.map(x -> UCharacter.toLowerCase(x))
880+
.collect(
881+
ImmutableSortedSet.toImmutableSortedSet(
882+
Collator.getInstance(Locale.ROOT)));
873883
TLD_SCANNER = Pattern.compile(pattern);
874-
if (SHOW_NON_ASCII_TLDS) {
875-
System.out.println(nonAscii);
876-
}
877884
} catch (IOException e) {
878885
throw new UncheckedIOException(e);
879886
}

0 commit comments

Comments
 (0)