|
5 | 5 | import com.google.common.collect.ComparisonChain; |
6 | 6 | import com.google.common.collect.ImmutableSet; |
7 | 7 | import com.google.common.collect.ImmutableSortedMap; |
| 8 | +import com.google.common.collect.ImmutableSortedSet; |
8 | 9 | import com.google.common.collect.Sets; |
9 | 10 | import com.google.common.util.concurrent.UncheckedExecutionException; |
10 | 11 | import com.ibm.icu.impl.IDNA2003; |
11 | 12 | import com.ibm.icu.impl.UnicodeMap; |
12 | 13 | import com.ibm.icu.lang.UCharacter; |
13 | 14 | import com.ibm.icu.lang.UProperty; |
14 | 15 | import com.ibm.icu.lang.UProperty.NameChoice; |
| 16 | +import com.ibm.icu.text.Collator; |
15 | 17 | import com.ibm.icu.text.StringPrepParseException; |
16 | 18 | import com.ibm.icu.text.UTF16; |
17 | 19 | import com.ibm.icu.text.UnicodeSet; |
|
27 | 29 | import java.util.EnumSet; |
28 | 30 | import java.util.HashMap; |
29 | 31 | import java.util.List; |
| 32 | +import java.util.Locale; |
30 | 33 | import java.util.Map; |
31 | 34 | import java.util.Map.Entry; |
32 | 35 | import java.util.NavigableMap; |
33 | 36 | import java.util.Set; |
| 37 | +import java.util.SortedSet; |
34 | 38 | import java.util.Stack; |
35 | 39 | import java.util.TreeSet; |
36 | 40 | import java.util.regex.Matcher; |
|
50 | 54 | import org.unicode.utilities.LinkUtilities.LinkScanner; |
51 | 55 |
|
52 | 56 | public class LinkUtilities { |
53 | | - private static final boolean SHOW_NON_ASCII_TLDS = true; |
54 | | - |
55 | 57 | // allow changing UnicodeSet to use the current IndexUnicodeProperties |
56 | 58 | public static final IndexUnicodeProperties IUP = |
57 | 59 | IndexUnicodeProperties.make(VersionInfo.UNICODE_17_0); |
@@ -830,6 +832,8 @@ public void showLinkPairedOpeners() { |
830 | 832 | */ |
831 | 833 | public static final Pattern TLD_SCANNER; |
832 | 834 |
|
| 835 | + public static final SortedSet<String> TLDS; |
| 836 | + |
833 | 837 | public static final String DOTSET_STRING = "[.。]"; |
834 | 838 | public static final UnicodeSet DOTSET = new UnicodeSet("[.。]").freeze(); |
835 | 839 | public static final Splitter SPLIT_LABELS = Splitter.on(Pattern.compile("[.。]")); |
@@ -870,10 +874,13 @@ public int compare(String o1, String o2) { |
870 | 874 | } |
871 | 875 | }); |
872 | 876 | String pattern = "(?u)" + DOTSET_STRING + "(" + Joiner.on('|').join(core) + ")"; |
| 877 | + TLDS = |
| 878 | + core.stream() |
| 879 | + .map(x -> UCharacter.toLowerCase(x)) |
| 880 | + .collect( |
| 881 | + ImmutableSortedSet.toImmutableSortedSet( |
| 882 | + Collator.getInstance(Locale.ROOT))); |
873 | 883 | TLD_SCANNER = Pattern.compile(pattern); |
874 | | - if (SHOW_NON_ASCII_TLDS) { |
875 | | - System.out.println(nonAscii); |
876 | | - } |
877 | 884 | } catch (IOException e) { |
878 | 885 | throw new UncheckedIOException(e); |
879 | 886 | } |
|
0 commit comments