|
5 | 5 | with open(config.get("G2P").get("LANGUAGE_JSON")) as f: |
6 | 6 | G2P_LANGCODES = json.load(f) |
7 | 7 | G2P_LANGUAGES: dict = dict(map(reversed, G2P_LANGCODES.items())) |
| 8 | + |
| 9 | +EPITRAN_LANGCODES = { |
| 10 | + "aar-Latn": "Afar", |
| 11 | + "afr-Latn": "Afrikaans", |
| 12 | + "aii-Syrc": "Assyrian Neo-Aramaic", |
| 13 | + "amh-Ethi": "Amharic", |
| 14 | + "amh-Ethi-pp": "Amharic (more phonetic)", |
| 15 | + "amh-Ethi-red": "Amharic (reduced)", |
| 16 | + "ara-Arab": "Literary Arabic", |
| 17 | + "ava-Cyrl": "Avaric", |
| 18 | + "aze-Cyrl": "Azerbaijani (Cyrillic)", |
| 19 | + "aze-Latn": "Azerbaijani", |
| 20 | + "ben-Beng": "Bengali", |
| 21 | + "ben-Beng-red": "Bengali (reduced)", |
| 22 | + "ben-Beng-east": "Eastern Bengali", |
| 23 | + "bho-Deva": "Bhojpuri", |
| 24 | + "bxk-Latn": "Bukusu", |
| 25 | + "cat-Latn": "Catalan", |
| 26 | + "ceb-Latn": "Cebuano", |
| 27 | + "ces-Latn": "Czech", |
| 28 | + "cjy-Latn": "Jin (Wiktionary)", |
| 29 | + "ckb-Arab": "Sorani", |
| 30 | + "cmn-Hans": "Mandarin (Simplified)*", |
| 31 | + "cmn-Hant": "Mandarin (Traditional)*", |
| 32 | + "cmn-Latn": "Mandarin (Pinyin)*", |
| 33 | + "csb-Latn": "Kashubian", |
| 34 | + "deu-Latn": "German", |
| 35 | + "deu-Latn-np": "German†", |
| 36 | + "deu-Latn-nar": "German (more phonetic)", |
| 37 | + "eng-Latn": "English", |
| 38 | + "epo-Latn": "Esperanto", |
| 39 | + "est-Latn": "Estonian", |
| 40 | + "fas-Arab": "Farsi (Perso-Arabic)", |
| 41 | + "fin-Latn": "Finnish", |
| 42 | + "fra-Latn": "French", |
| 43 | + "fra-Latn-np": "French†", |
| 44 | + "fra-Latn-p": "French (more phonetic)", |
| 45 | + "ful-Latn": "Fulah", |
| 46 | + "gan-Latn": "Gan (Wiktionary)", |
| 47 | + "glg-Latn": "Galician", |
| 48 | + "got-Goth": "Gothic", |
| 49 | + "got-Latn": "Gothic (Latin)", |
| 50 | + "hak-Latn": "Hakka (pha̍k-fa-sṳ)", |
| 51 | + "hat-Latn-bab": "Haitian (Latin-Babel)", |
| 52 | + "hau-Latn": "Hausa", |
| 53 | + "hin-Deva": "Hindi", |
| 54 | + "hmn-Latn": "Hmong", |
| 55 | + "hrv-Latn": "Croatian", |
| 56 | + "hsn-Latn": "Xiang (Wiktionary)", |
| 57 | + "hun-Latn": "Hungarian", |
| 58 | + "ilo-Latn": "Ilocano", |
| 59 | + "ind-Latn": "Indonesian", |
| 60 | + "ita-Latn": "Italian", |
| 61 | + "jam-Latn": "Jamaican", |
| 62 | + "jav-Latn": "Javanese", |
| 63 | + "jpn-Hrgn": "Japanese (Hiragana)", |
| 64 | + "jpn-Hrgn-red": "Japanese (Hiragana, reduced)", |
| 65 | + "jpn-Ktkn": "Japanese (Katakana)", |
| 66 | + "jpn-Ktkn-red": "Japanese (Katakana, reduced)", |
| 67 | + "jpn-Jpan": "Japanese (Hiragana, Katakana, Kanji)", |
| 68 | + "jpn-Hira": "Japanese (Hiragana)", |
| 69 | + "jpn-Hira-red": "Japanese (Hiragana, reduced)", |
| 70 | + "jpn-Kana": "Japanese (Katakana)", |
| 71 | + "jpn-Kana-red": "Japanese (Katakana, reduced)", |
| 72 | + "kat-Geor": "Georgian", |
| 73 | + "kaz-Cyrl": "Kazakh (Cyrillic)", |
| 74 | + "kaz-Cyrl-bab": "Kazakh (Cyrillic—Babel)", |
| 75 | + "kaz-Latn": "Kazakh (Latin)", |
| 76 | + "kbd-Cyrl": "Kabardian", |
| 77 | + "khm-Khmr": "Khmer", |
| 78 | + "kin-Latn": "Kinyarwanda", |
| 79 | + "kir-Arab": "Kyrgyz (Perso-Arabic)", |
| 80 | + "kir-Cyrl": "Kyrgyz (Cyrillic)", |
| 81 | + "kir-Latn": "Kyrgyz (Latin)", |
| 82 | + "kmr-Latn": "Kurmanji", |
| 83 | + "kmr-Latn-red": "Kurmanji (reduced)", |
| 84 | + "kor-Hang": "Korean", |
| 85 | + "lao-Laoo": "Lao", |
| 86 | + "lao-Laoo-prereform": "Lao (Before spelling reform)", |
| 87 | + "lav-Latn": "Latvian", |
| 88 | + "lez-Cyrl": "Lezgian", |
| 89 | + "lij-Latn": "Ligurian", |
| 90 | + "lit-Latn": "Lithuanian", |
| 91 | + "lsm-Latn": "Saamia", |
| 92 | + "ltc-Latn-bax": "Middle Chinese (Baxter and Sagart 2014)", |
| 93 | + "lug-Latn": "Ganda / Luganda", |
| 94 | + "mal-Mlym": "Malayalam", |
| 95 | + "mar-Deva": "Marathi", |
| 96 | + "mlt-Latn": "Maltese", |
| 97 | + "mon-Cyrl-bab": "Mongolian (Cyrillic)", |
| 98 | + "mri-Latn": "Maori", |
| 99 | + "msa-Latn": "Malay", |
| 100 | + "mya-Mymr": "Burmese", |
| 101 | + "nan-Latn": "Hokkien (pe̍h-oē-jī)", |
| 102 | + "nan-Latn-tl": "Hokkien (Tâi-lô)", |
| 103 | + "nld-Latn": "Dutch", |
| 104 | + "nya-Latn": "Chichewa", |
| 105 | + "ood-Latn-alv": "Tohono O'odham (Alvarez-Hale)", |
| 106 | + "ood-Latn-sax": "Tohono O'odham (Saxton)", |
| 107 | + "ori-Orya": "Odia", |
| 108 | + "orm-Latn": "Oromo", |
| 109 | + "pan-Guru": "Punjabi (Eastern)", |
| 110 | + "pol-Latn": "Polish", |
| 111 | + "por-Latn": "Portuguese", |
| 112 | + "quy-Latn": "Ayacucho Quechua / Quechua Chanka", |
| 113 | + "ron-Latn": "Romanian", |
| 114 | + "run-Latn": "Rundi", |
| 115 | + "rus-Cyrl": "Russian", |
| 116 | + "sag-Latn": "Sango", |
| 117 | + "sin-Sinh": "Sinhala", |
| 118 | + "slv-Latn": "Slovene / Slovenian", |
| 119 | + "sna-Latn": "Shona", |
| 120 | + "som-Latn": "Somali", |
| 121 | + "spa-Latn": "Spanish", |
| 122 | + "spa-Latn-eu": "Spanish (Iberian)", |
| 123 | + "sqi-Latn": "Albanian", |
| 124 | + "sro-Latn": "Sardinian (Campidanese)", |
| 125 | + "srp-Latn": "Serbian (Latin)", |
| 126 | + "srp-Cyrl": "Serbian (Cyrillic)", |
| 127 | + "swa-Latn": "Swahili", |
| 128 | + "swa-Latn-red": "Swahili (reduced)", |
| 129 | + "swe-Latn": "Swedish", |
| 130 | + "tam-Taml": "Tamil", |
| 131 | + "tam-Taml-red": "Tamil (reduced)", |
| 132 | + "tel-Telu": "Telugu", |
| 133 | + "tgk-Cyrl": "Tajik", |
| 134 | + "tgl-Latn": "Tagalog", |
| 135 | + "tgl-Latn-red": "Tagalog (reduced)", |
| 136 | + "tha-Thai": "Thai", |
| 137 | + "tir-Ethi": "Tigrinya", |
| 138 | + "tir-Ethi-pp": "Tigrinya (more phonemic)", |
| 139 | + "tir-Ethi-red": "Tigrinya (reduced)", |
| 140 | + "tok-Latn": "Toki Pona", |
| 141 | + "tpi-Latn": "Tok Pisin", |
| 142 | + "tuk-Cyrl": "Turkmen (Cyrillic)", |
| 143 | + "tuk-Latn": "Turkmen (Latin)", |
| 144 | + "tur-Latn": "Turkish (Latin)", |
| 145 | + "tur-Latn-bab": "Turkish (Latin—Babel)", |
| 146 | + "tur-Latn-red": "Turkish (reduced)", |
| 147 | + "ukr-Cyrl": "Ukrainian", |
| 148 | + "urd-Arab": "Urdu", |
| 149 | + "uig-Arab": "Uyghur (Perso-Arabic)", |
| 150 | + "uzb-Cyrl": "Uzbek (Cyrillic)", |
| 151 | + "uzb-Latn": "Uzbek (Latin)", |
| 152 | + "vie-Latn": "Vietnamese", |
| 153 | + "wuu-Latn": "Shanghainese Wu (Wiktionary)", |
| 154 | + "xho-Latn": "Xhosa", |
| 155 | + "yor-Latn": "Yoruba", |
| 156 | + "yue-Latn": "Cantonese (Jyutping)", |
| 157 | + "yue-Hant": "Cantonese (Character)", |
| 158 | + "zha-Latn": "Zhuang", |
| 159 | + "zul-Latn": "Zulu", |
| 160 | +} |
0 commit comments