Skip to content

Commit 012d36c

Browse files
committed
BL-14201 improve macrolanguage handling
1 parent b32c2df commit 012d36c

File tree

17 files changed

+455
-331
lines changed

17 files changed

+455
-331
lines changed

.vscode/settings.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
"autonyms",
1212
"castellano",
1313
"ethnolib",
14+
"Ethnologue",
1415
"exonym",
1516
"langtag",
1617
"langtags",
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
// TODO Revisit this. There are a whole bunch more (maybe 2x more)
2+
// languages marked "historical" which we should possibly also filter out; we should
3+
// consider instead using a whitelist of historical languages incl. ancient greeks,
4+
// hebrews, latin, etc. and then removing all other languages marked "historical".
5+
// If we could figure out what Ethnologue includes and copy that, that might be ideal.
6+
// Looks like it includes the various ancient greek codes but not Old English etc.
7+
//
8+
// Neither the current Bloom language picker nor mui-language-picker (Audio Project Manager) exclude
9+
// historical languages like old english.
10+
//
11+
// TODO also generate DEFAULT_EXCLUDED_HISTORIC_LANGUAGE_CODES dynamically.
12+
13+
// languages with ISO 630-3 "historic" language type and "Old", "Middle", "Ancient", "Classical" in their name (exonym)
14+
// except for Ancient Greek (grc), Ancient Hebrew (hbo), Old Aramaic (up to 700 BCE) (oar)
15+
export const DEFAULT_EXCLUDED_HISTORIC_LANGUAGE_CODES = new Set([
16+
"ang", // Old English (ca. 450-1100)
17+
"axm", // Middle Armenian
18+
"cmg", // Classical Mongolian
19+
"cnx", // Middle Cornish
20+
"dum", // Middle Dutch (ca. 1050-1350)
21+
"egy", // Egyptian (Ancient)
22+
"enm", // Middle English (1100-1500)
23+
"frm", // Middle French (ca. 1400-1600)
24+
"fro", // Old French (842-ca. 1400)
25+
"gmh", // Middle High German (ca. 1050-1500)
26+
"gml", // Middle Low German
27+
"goh", // Old High German (ca. 750-1050)
28+
"htx", // Middle Hittite
29+
"ltc", // Late Middle Chinese
30+
"lzh", // Classical Chinese
31+
"mga", // Middle Irish (900-1200)
32+
"myz", // Classical Mandaic
33+
"nci", // Classical Nahuatl
34+
"non", // Old Norse
35+
"nwc", // Classical Newari
36+
"nwx", // Middle Newar
37+
"oav", // Old Avar
38+
"obr", // Old Burmese
39+
"obt", // Old Breton
40+
"och", // Old Chinese
41+
"ocm", // Old Cham
42+
"oco", // Old Cornish
43+
"odt", // Old Dutch
44+
"ofs", // Old Frisian
45+
"oge", // Old Georgian
46+
"oht", // Old Hittite
47+
"ohu", // Old Hungarian
48+
"ojp", // Old Japanese
49+
"okm", // Middle Korean (10th-16th cent.)
50+
"oko", // Old Korean (3rd-9th cent.)
51+
"okz", // Old Khmer
52+
"olt", // Old Lithuanian
53+
"omp", // Old Manipuri
54+
"omr", // Old Marathi
55+
"omx", // Old Mon
56+
"omy", // Old Malay
57+
"onw", // Old Nubian
58+
"oos", // Old Ossetic
59+
"orv", // Old Russian
60+
"osn", // Old Sundanese
61+
"osp", // Old Spanish
62+
"osx", // Old Saxon
63+
"otb", // Old Tibetan
64+
"otk", // Old Turkish
65+
"oty", // Old Tamil
66+
"oui", // Old Uighur
67+
"owl", // Old Welsh
68+
"peo", // Old Persian (ca. 600-400 B.C.)
69+
"pro", // Old Provençal (to 1500)
70+
"qwc", // Classical Quechua
71+
"sga", // Old Irish (to 900)
72+
"wlm", // Middle Welsh
73+
"xbm", // Middle Breton
74+
"xcl", // Classical Armenian
75+
"xct", // Classical Tibetan
76+
"xhm", // Middle Khmer (1400 to 1850 CE)
77+
"xlg", // Ligurian (Ancient)
78+
"xmk", // Ancient Macedonian
79+
"xmn", // Manichaean Middle Persian
80+
"xna", // Ancient North Arabian
81+
"xng", // Middle Mongolian
82+
"xzp", // Ancient Zapotec
83+
]);
84+
85+
// function hasOldKeyword(lang: ILanguage) {
86+
// for (const oldKeyword of ["Old", "Middle", "Ancient", "Classical"]) {
87+
// if (lang.exonym.includes(oldKeyword)) {
88+
// return true;
89+
// }
90+
// }
91+
// return false;
92+
// }
93+
94+
// for (const lang of reformattedLangs) {
95+
// if (lang.languageType === LanguageType.Historical && hasOldKeyword(lang)) {
96+
// console.log(lang.exonym, lang.iso639_3_code);
97+
// }
98+
// }

components/language-chooser/common/find-language/findLanguageInterfaces.ts

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,16 @@ export interface IScript {
88
name: string;
99
}
1010

11+
export enum LanguageType {
12+
Ancient = "Ancient",
13+
Constructed = "Constructed",
14+
Extinct = "Extinct",
15+
Historical = "Historical",
16+
Living = "Living",
17+
Special = "Special",
18+
Unknown = "Unknown",
19+
}
20+
1121
export interface ILanguage {
1222
autonym?: string;
1323
exonym: string;
@@ -18,8 +28,8 @@ export interface ILanguage {
1828
scripts: IScript[];
1929
variants?: string; // comma-joined
2030
alternativeTags: string[];
21-
isForMacrolanguageDisambiguation?: boolean;
2231
isMacrolanguage?: boolean;
32+
languageType: LanguageType;
2333
// eslint-disable-next-line @typescript-eslint/no-explicit-any
2434
[key: string]: any; // allow indexing by string
2535
}

0 commit comments

Comments
 (0)