|
| 1 | +// TODO Revisit this. There are a whole bunch more (maybe 2x more) |
| 2 | +// languages marked "historical" which we should possibly also filter out; we should |
| 3 | +// consider instead using a whitelist of historical languages incl. ancient greeks, |
| 4 | +// hebrews, latin, etc. and then removing all other languages marked "historical". |
| 5 | +// If we could figure out what Ethnologue includes and copy that, that might be ideal. |
| 6 | +// Looks like it includes the various ancient greek codes but not Old English etc. |
| 7 | +// |
| 8 | +// Neither the current Bloom language picker nor mui-language-picker (Audio Project Manager) exclude |
| 9 | +// historical languages like old english. |
| 10 | +// |
| 11 | +// TODO also generate DEFAULT_EXCLUDED_HISTORIC_LANGUAGE_CODES dynamically. |
| 12 | + |
| 13 | +// languages with ISO 630-3 "historic" language type and "Old", "Middle", "Ancient", "Classical" in their name (exonym) |
| 14 | +// except for Ancient Greek (grc), Ancient Hebrew (hbo), Old Aramaic (up to 700 BCE) (oar) |
| 15 | +export const DEFAULT_EXCLUDED_HISTORIC_LANGUAGE_CODES = new Set([ |
| 16 | + "ang", // Old English (ca. 450-1100) |
| 17 | + "axm", // Middle Armenian |
| 18 | + "cmg", // Classical Mongolian |
| 19 | + "cnx", // Middle Cornish |
| 20 | + "dum", // Middle Dutch (ca. 1050-1350) |
| 21 | + "egy", // Egyptian (Ancient) |
| 22 | + "enm", // Middle English (1100-1500) |
| 23 | + "frm", // Middle French (ca. 1400-1600) |
| 24 | + "fro", // Old French (842-ca. 1400) |
| 25 | + "gmh", // Middle High German (ca. 1050-1500) |
| 26 | + "gml", // Middle Low German |
| 27 | + "goh", // Old High German (ca. 750-1050) |
| 28 | + "htx", // Middle Hittite |
| 29 | + "ltc", // Late Middle Chinese |
| 30 | + "lzh", // Classical Chinese |
| 31 | + "mga", // Middle Irish (900-1200) |
| 32 | + "myz", // Classical Mandaic |
| 33 | + "nci", // Classical Nahuatl |
| 34 | + "non", // Old Norse |
| 35 | + "nwc", // Classical Newari |
| 36 | + "nwx", // Middle Newar |
| 37 | + "oav", // Old Avar |
| 38 | + "obr", // Old Burmese |
| 39 | + "obt", // Old Breton |
| 40 | + "och", // Old Chinese |
| 41 | + "ocm", // Old Cham |
| 42 | + "oco", // Old Cornish |
| 43 | + "odt", // Old Dutch |
| 44 | + "ofs", // Old Frisian |
| 45 | + "oge", // Old Georgian |
| 46 | + "oht", // Old Hittite |
| 47 | + "ohu", // Old Hungarian |
| 48 | + "ojp", // Old Japanese |
| 49 | + "okm", // Middle Korean (10th-16th cent.) |
| 50 | + "oko", // Old Korean (3rd-9th cent.) |
| 51 | + "okz", // Old Khmer |
| 52 | + "olt", // Old Lithuanian |
| 53 | + "omp", // Old Manipuri |
| 54 | + "omr", // Old Marathi |
| 55 | + "omx", // Old Mon |
| 56 | + "omy", // Old Malay |
| 57 | + "onw", // Old Nubian |
| 58 | + "oos", // Old Ossetic |
| 59 | + "orv", // Old Russian |
| 60 | + "osn", // Old Sundanese |
| 61 | + "osp", // Old Spanish |
| 62 | + "osx", // Old Saxon |
| 63 | + "otb", // Old Tibetan |
| 64 | + "otk", // Old Turkish |
| 65 | + "oty", // Old Tamil |
| 66 | + "oui", // Old Uighur |
| 67 | + "owl", // Old Welsh |
| 68 | + "peo", // Old Persian (ca. 600-400 B.C.) |
| 69 | + "pro", // Old Provençal (to 1500) |
| 70 | + "qwc", // Classical Quechua |
| 71 | + "sga", // Old Irish (to 900) |
| 72 | + "wlm", // Middle Welsh |
| 73 | + "xbm", // Middle Breton |
| 74 | + "xcl", // Classical Armenian |
| 75 | + "xct", // Classical Tibetan |
| 76 | + "xhm", // Middle Khmer (1400 to 1850 CE) |
| 77 | + "xlg", // Ligurian (Ancient) |
| 78 | + "xmk", // Ancient Macedonian |
| 79 | + "xmn", // Manichaean Middle Persian |
| 80 | + "xna", // Ancient North Arabian |
| 81 | + "xng", // Middle Mongolian |
| 82 | + "xzp", // Ancient Zapotec |
| 83 | +]); |
| 84 | + |
| 85 | +// function hasOldKeyword(lang: ILanguage) { |
| 86 | +// for (const oldKeyword of ["Old", "Middle", "Ancient", "Classical"]) { |
| 87 | +// if (lang.exonym.includes(oldKeyword)) { |
| 88 | +// return true; |
| 89 | +// } |
| 90 | +// } |
| 91 | +// return false; |
| 92 | +// } |
| 93 | + |
| 94 | +// for (const lang of reformattedLangs) { |
| 95 | +// if (lang.languageType === LanguageType.Historical && hasOldKeyword(lang)) { |
| 96 | +// console.log(lang.exonym, lang.iso639_3_code); |
| 97 | +// } |
| 98 | +// } |
0 commit comments