Skip to content

Commit d7c6a23

Browse files
GlavoCopilot
andauthored
通过解析 IANA 语言子标签注册表增强本地化功能 (#4675)
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
1 parent 27e1e02 commit d7c6a23

File tree

12 files changed

+667
-272
lines changed

12 files changed

+667
-272
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,9 @@ hmcl-exported-logs-*
1919
/.local/
2020
/.cache/
2121

22+
# IANA Language Subtag Registry
23+
language-subtag-registry
24+
2225
# gradle build
2326
/build/
2427
/HMCL/build/

HMCLCore/src/main/java/org/jackhuang/hmcl/util/i18n/DefaultResourceBundleControl.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
/// - For all Chinese locales, `zh-CN` is always added to the candidate list. If `zh-Hans` already exists in the candidate list,
3131
/// `zh-CN` is inserted before `zh`; otherwise, it is inserted after `zh`.
3232
/// - For all Traditional Chinese locales, `zh-TW` is always added to the candidate list (before `zh`).
33-
/// - For all [supported][LocaleUtils#mapToISO2Language(String)] ISO 639-3 language code (such as `eng`, `zho`, `lzh`, etc.),
33+
/// - For all supported ISO 639-3 language code (such as `eng`, `zho`, `lzh`, etc.),
3434
/// a candidate list with the language code replaced by the ISO 639-1 (Macro)language code is added to the end of the candidate list.
3535
///
3636
/// @author Glavo

HMCLCore/src/main/java/org/jackhuang/hmcl/util/i18n/LocaleUtils.java

Lines changed: 110 additions & 141 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,7 @@
1717
*/
1818
package org.jackhuang.hmcl.util.i18n;
1919

20-
import org.jackhuang.hmcl.util.Lang;
2120
import org.jackhuang.hmcl.util.StringUtils;
22-
import org.jackhuang.hmcl.util.io.IOUtils;
2321
import org.jackhuang.hmcl.util.platform.NativeUtils;
2422
import org.jackhuang.hmcl.util.platform.OperatingSystem;
2523
import org.jackhuang.hmcl.util.platform.windows.Kernel32;
@@ -29,6 +27,8 @@
2927
import org.jetbrains.annotations.Unmodifiable;
3028

3129
import java.io.IOException;
30+
import java.io.InputStream;
31+
import java.nio.charset.StandardCharsets;
3232
import java.nio.file.Files;
3333
import java.nio.file.Path;
3434
import java.time.Duration;
@@ -48,66 +48,74 @@ public final class LocaleUtils {
4848

4949
public static final Locale SYSTEM_DEFAULT = Locale.getDefault();
5050

51-
public static final Locale LOCALE_ZH_HANS = Locale.forLanguageTag("zh-Hans");
52-
public static final Locale LOCALE_ZH_HANT = Locale.forLanguageTag("zh-Hant");
53-
54-
public static final String DEFAULT_LANGUAGE_KEY = "default";
51+
public static final boolean IS_CHINA_MAINLAND = isChinaMainland();
5552

56-
private static final Map<String, String> subLanguageToParent = new HashMap<>();
57-
private static final Map<String, String> iso3To2 = new HashMap<>();
58-
private static final Set<String> rtl = new HashSet<>();
53+
private static boolean isChinaMainland() {
54+
if ("Asia/Shanghai".equals(ZoneId.systemDefault().getId()))
55+
return true;
5956

60-
static {
61-
try {
62-
for (String line : Lang.toIterable(IOUtils.readFullyAsString(LocaleUtils.class.getResourceAsStream("/assets/lang/sublanguages.csv")).lines())) {
63-
if (line.startsWith("#") || line.isBlank()) {
64-
continue;
65-
}
57+
// Check if the time zone is UTC+8
58+
if (ZonedDateTime.now().getOffset().getTotalSeconds() == Duration.ofHours(8).toSeconds()) {
59+
if ("CN".equals(LocaleUtils.SYSTEM_DEFAULT.getCountry()))
60+
return true;
6661

67-
String[] languages = line.split(",");
68-
if (languages.length < 2) {
69-
LOG.warning("Invalid line in sublanguages.csv: " + line);
70-
continue;
71-
}
62+
if (OperatingSystem.CURRENT_OS == OperatingSystem.WINDOWS && NativeUtils.USE_JNA) {
63+
Kernel32 kernel32 = Kernel32.INSTANCE;
7264

73-
String parent = languages[0];
74-
for (int i = 1; i < languages.length; i++) {
75-
subLanguageToParent.put(languages[i], parent);
76-
}
65+
// https://learn.microsoft.com/windows/win32/intl/table-of-geographical-locations
66+
if (kernel32 != null && kernel32.GetUserGeoID(WinConstants.GEOCLASS_NATION) == 45) // China
67+
return true;
7768
}
78-
} catch (Throwable e) {
79-
LOG.warning("Failed to load sublanguages.csv", e);
8069
}
8170

82-
try {
83-
// Line Format: (?<iso2>[a-z]{2}),(?<iso3>[a-z]{3})
84-
for (String line : Lang.toIterable(IOUtils.readFullyAsString(LocaleUtils.class.getResourceAsStream("/assets/lang/iso_languages.csv")).lines())) {
85-
if (line.startsWith("#") || line.isBlank()) {
86-
continue;
87-
}
71+
return false;
72+
}
8873

89-
String[] parts = line.split(",", 3);
90-
if (parts.length != 2) {
91-
LOG.warning("Invalid line in iso_languages.csv: " + line);
92-
continue;
93-
}
74+
public static final Locale LOCALE_ZH_HANS = Locale.forLanguageTag("zh-Hans");
75+
public static final Locale LOCALE_ZH_HANT = Locale.forLanguageTag("zh-Hant");
9476

95-
iso3To2.put(parts[1], parts[0]);
96-
}
97-
} catch (Throwable e) {
98-
LOG.warning("Failed to load iso_languages.csv", e);
77+
public static final String DEFAULT_LANGUAGE_KEY = "default";
78+
79+
private static final Map<String, String> PARENT_LANGUAGE = loadCSV("sublanguages.csv");
80+
private static final Map<String, String> NORMALIZED_TAG = loadCSV("language_aliases.csv");
81+
private static final Map<String, String> DEFAULT_SCRIPT = loadCSV("default_script.csv");
82+
private static final Map<String, String> PREFERRED_LANGUAGE = Map.of("zh", "cmn");
83+
private static final Set<String> RTL_SCRIPTS = Set.of("Qabs", "Arab", "Hebr");
84+
private static final Set<String> CHINESE_TRADITIONAL_REGIONS = Set.of("TW", "HK", "MO");
85+
86+
/// Load CSV files located in `/assets/lang/`.
87+
/// Each line in these files contains at least two elements.
88+
///
89+
/// For example, if a file contains `value0,value1,value2`, the return value will be `{value1=value0, value2=value0}`.
90+
private static Map<String, String> loadCSV(String fileName) {
91+
InputStream resource = LocaleUtils.class.getResourceAsStream("/assets/lang/" + fileName);
92+
if (resource == null) {
93+
LOG.warning("Can't find file: " + fileName);
94+
return Map.of();
9995
}
10096

101-
try {
102-
for (String line : Lang.toIterable(IOUtils.readFullyAsString(LocaleUtils.class.getResourceAsStream("/assets/lang/rtl.txt")).lines())) {
103-
if (line.startsWith("#") || line.isBlank()) {
104-
continue;
97+
HashMap<String, String> result = new HashMap<>();
98+
try (resource) {
99+
new String(resource.readAllBytes(), StandardCharsets.UTF_8).lines().forEach(line -> {
100+
if (line.startsWith("#") || line.isBlank())
101+
return;
102+
103+
String[] items = line.split(",");
104+
if (items.length < 2) {
105+
LOG.warning("Invalid line in " + fileName + ": " + line);
106+
return;
105107
}
106-
rtl.add(line.trim());
107-
}
108+
109+
String parent = items[0];
110+
for (int i = 1; i < items.length; i++) {
111+
result.put(items[i], parent);
112+
}
113+
});
108114
} catch (Throwable e) {
109-
LOG.warning("Failed to load rtl.txt", e);
115+
LOG.warning("Failed to load " + fileName, e);
110116
}
117+
118+
return Map.copyOf(result);
111119
}
112120

113121
private static Locale getInstance(String language, String script, String region,
@@ -130,6 +138,31 @@ public static String toLanguageKey(Locale locale) {
130138
: locale.stripExtensions().toLanguageTag();
131139
}
132140

141+
public static boolean isEnglish(Locale locale) {
142+
return "en".equals(getRootLanguage(locale));
143+
}
144+
145+
public static boolean isChinese(Locale locale) {
146+
return "zh".equals(getRootLanguage(locale));
147+
}
148+
149+
// ---
150+
151+
/// Normalize the language code to the code in the IANA Language Subtag Registry.
152+
/// Typically, it normalizes ISO 639 alpha-3 codes to ISO 639 alpha-2 codes.
153+
public static @NotNull String normalizeLanguage(String language) {
154+
return language.isEmpty()
155+
? "en"
156+
: NORMALIZED_TAG.getOrDefault(language, language);
157+
}
158+
159+
/// If `language` is a sublanguage of a [macrolanguage](https://en.wikipedia.org/wiki/ISO_639_macrolanguage),
160+
/// return the macrolanguage; otherwise, return `null`.
161+
public static @Nullable String getParentLanguage(String language) {
162+
return PARENT_LANGUAGE.get(language);
163+
}
164+
165+
/// @see #getRootLanguage(String)
133166
public static @NotNull String getRootLanguage(Locale locale) {
134167
return getRootLanguage(locale.getLanguage());
135168
}
@@ -140,54 +173,54 @@ public static String toLanguageKey(Locale locale) {
140173
/// - If `language` is empty, return `en`;
141174
/// - Otherwise, return the `language`.
142175
public static @NotNull String getRootLanguage(String language) {
143-
if (language.isEmpty()) return "en";
144-
if (language.length() <= 2)
145-
return language;
146-
147-
String iso2 = mapToISO2Language(language);
148-
if (iso2 != null)
149-
return iso2;
176+
language = normalizeLanguage(language);
150177

151178
String parent = getParentLanguage(language);
152179
return parent != null ? parent : language;
153180
}
154181

182+
/// If `language` is a macrolanguage, try to map it to the most commonly used individual language.
183+
///
184+
/// For example, if `language` is `zh`, this method will return `cmn`.
185+
public static @NotNull String getPreferredLanguage(String language) {
186+
language = normalizeLanguage(language);
187+
return PREFERRED_LANGUAGE.getOrDefault(language, language);
188+
}
189+
155190
/// Get the script of the locale. If the script is empty and the language is Chinese,
156191
/// the script will be inferred based on the language, the region and the variant.
157192
public static @NotNull String getScript(Locale locale) {
158193
if (locale.getScript().isEmpty()) {
159-
if (isEnglish(locale)) {
160-
if ("UD".equals(locale.getCountry())) {
161-
return "Qabs";
162-
}
194+
if (!locale.getVariant().isEmpty()) {
195+
String script = DEFAULT_SCRIPT.get(locale.getVariant());
196+
if (script != null)
197+
return script;
163198
}
164199

200+
if ("UD".equals(locale.getCountry())) {
201+
return "Qabs";
202+
}
203+
204+
String script = DEFAULT_SCRIPT.get(normalizeLanguage(locale.getLanguage()));
205+
if (script != null)
206+
return script;
207+
165208
if (isChinese(locale)) {
166-
if (CHINESE_LATN_VARIANTS.contains(locale.getVariant()))
167-
return "Latn";
168-
if (locale.getLanguage().equals("lzh") || CHINESE_TRADITIONAL_REGIONS.contains(locale.getCountry()))
169-
return "Hant";
170-
else
171-
return "Hans";
209+
return CHINESE_TRADITIONAL_REGIONS.contains(locale.getCountry())
210+
? "Hant"
211+
: "Hans";
172212
}
213+
214+
return "";
173215
}
174216

175217
return locale.getScript();
176218
}
177219

178220
public static @NotNull TextDirection getTextDirection(Locale locale) {
179-
TextDirection direction = rtl.contains(getRootLanguage(locale))
221+
return RTL_SCRIPTS.contains(getScript(locale))
180222
? TextDirection.RIGHT_TO_LEFT
181223
: TextDirection.LEFT_TO_RIGHT;
182-
183-
if ("Qabs".equals(getScript(locale))) {
184-
direction = switch (direction) {
185-
case RIGHT_TO_LEFT -> TextDirection.LEFT_TO_RIGHT;
186-
case LEFT_TO_RIGHT -> TextDirection.RIGHT_TO_LEFT;
187-
};
188-
}
189-
190-
return direction;
191224
}
192225

193226
private static final ConcurrentMap<Locale, List<Locale>> CANDIDATE_LOCALES = new ConcurrentHashMap<>();
@@ -196,13 +229,8 @@ public static String toLanguageKey(Locale locale) {
196229
return CANDIDATE_LOCALES.computeIfAbsent(locale, LocaleUtils::createCandidateLocaleList);
197230
}
198231

199-
// -------------
200-
201232
private static List<Locale> createCandidateLocaleList(Locale locale) {
202-
String language = locale.getLanguage();
203-
if (language.isEmpty())
204-
return List.of(Locale.ENGLISH, Locale.ROOT);
205-
233+
String language = getPreferredLanguage(locale.getLanguage());
206234
String script = getScript(locale);
207235
String region = locale.getCountry();
208236
List<String> variants = locale.getVariant().isEmpty()
@@ -211,18 +239,7 @@ private static List<Locale> createCandidateLocaleList(Locale locale) {
211239

212240
ArrayList<Locale> result = new ArrayList<>();
213241
do {
214-
String currentLanguage;
215-
216-
if (language.length() <= 2) {
217-
currentLanguage = language;
218-
} else {
219-
String iso2 = mapToISO2Language(language);
220-
currentLanguage = iso2 != null
221-
? iso2
222-
: language;
223-
}
224-
225-
addCandidateLocales(result, currentLanguage, script, region, variants);
242+
addCandidateLocales(result, language, script, region, variants);
226243
} while ((language = getParentLanguage(language)) != null);
227244

228245
result.add(Locale.ROOT);
@@ -367,54 +384,6 @@ else if (fileName.length() > defaultFileNameLength + 1 && fileName.charAt(baseNa
367384
return Map.of();
368385
}
369386

370-
// ---
371-
372-
/// Map ISO 639 alpha-3 language codes to ISO 639 alpha-2 language codes.
373-
/// Returns `null` if there is no corresponding ISO 639 alpha-2 language code.
374-
public static @Nullable String mapToISO2Language(String iso3Language) {
375-
return iso3To2.get(iso3Language);
376-
}
377-
378-
/// If `language` is a sublanguage of a [macrolanguage](https://en.wikipedia.org/wiki/ISO_639_macrolanguage),
379-
/// return the macrolanguage; otherwise, return `null`.
380-
public static @Nullable String getParentLanguage(String language) {
381-
return subLanguageToParent.get(language);
382-
}
383-
384-
public static boolean isEnglish(Locale locale) {
385-
return "en".equals(getRootLanguage(locale));
386-
}
387-
388-
public static final Set<String> CHINESE_TRADITIONAL_REGIONS = Set.of("TW", "HK", "MO");
389-
public static final Set<String> CHINESE_LATN_VARIANTS = Set.of("pinyin", "wadegile", "tongyong");
390-
391-
public static boolean isChinese(Locale locale) {
392-
return "zh".equals(getRootLanguage(locale));
393-
}
394-
395-
public static final boolean IS_CHINA_MAINLAND = isChinaMainland();
396-
397-
private static boolean isChinaMainland() {
398-
if ("Asia/Shanghai".equals(ZoneId.systemDefault().getId()))
399-
return true;
400-
401-
// Check if the time zone is UTC+8
402-
if (ZonedDateTime.now().getOffset().getTotalSeconds() == Duration.ofHours(8).toSeconds()) {
403-
if ("CN".equals(LocaleUtils.SYSTEM_DEFAULT.getCountry()))
404-
return true;
405-
406-
if (OperatingSystem.CURRENT_OS == OperatingSystem.WINDOWS && NativeUtils.USE_JNA) {
407-
Kernel32 kernel32 = Kernel32.INSTANCE;
408-
409-
// https://learn.microsoft.com/windows/win32/intl/table-of-geographical-locations
410-
if (kernel32 != null && kernel32.GetUserGeoID(WinConstants.GEOCLASS_NATION) == 45) // China
411-
return true;
412-
}
413-
}
414-
415-
return false;
416-
}
417-
418387
private LocaleUtils() {
419388
}
420389
}
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
Arab,ar,fa,ps,ur
2+
Armn,hy
3+
Beng,as,bn
4+
Blis,zbl
5+
Cyrl,ab,be,bg,kk,mk,ru,uk
6+
Deva,hi,mr,ne,kok,mai
7+
Ethi,am,ti
8+
Geor,ka
9+
Grek,el
10+
Gujr,gu
11+
Guru,pa
12+
Hant,lzh
13+
Hebr,he,yi
14+
Jpan,ja
15+
Khmr,km
16+
Knda,kn
17+
Kore,ko
18+
Laoo,lo
19+
Latn,af,ay,bs,ca,ch,cs,cy,da,de,en,eo,es,et,eu,fi,fj,fo,fr,fy,ga,gl,gn,gv,hr,ht,hu,id,is,it,kl,la,lb,ln,lt,lv,mg,mh,ms,mt,na,nb,nd,nl,nn,no,nr,ny,om,pl,pt,qu,rm,rn,ro,rw,sg,sk,sl,sm,so,sq,ss,st,sv,sw,tl,tn,to,tr,ts,ve,vi,xh,zu,dsb,frr,frs,gsw,hsb,men,nds,niu,nso,tem,tkl,tmh,tpi,tvl,tailo,pinyin,hepburn,pehoeji,tongyong,wadegile
20+
Mlym,ml
21+
Mymr,my
22+
Nkoo,nqo
23+
Orya,or
24+
Sinh,si
25+
Taml,ta
26+
Telu,te
27+
Thaa,dv
28+
Thai,th
29+
Tibt,dz

0 commit comments

Comments
 (0)