|
10 | 10 | # https://github.com/tesseract-ocr/tessdata). For the complete list, see http://www-01.sil.org/iso639-3/codes.asp |
11 | 11 |
|
12 | 12 | iso639_3 = { |
13 | | - 'Afar': 'aar', |
14 | | - 'Afrikaans': 'afr', |
15 | | - 'Akan': 'aka', |
16 | | - 'Albanian': 'als', |
17 | | - 'Amharic': 'amh', |
18 | | - 'Old English': 'ang', |
19 | | - 'Arabic': 'arb', |
20 | | - 'Egyptian Arabic': 'arz', |
21 | | - 'Assamese': 'asm', |
22 | | - 'Asturian': 'ast', |
23 | | - 'Azerbaijani': 'aze', |
24 | | - 'Bambara': 'bam', |
25 | | - 'Belarusian': 'bel', |
26 | | - 'Bengali': 'ben', |
27 | | - 'Tibetan': 'bod', |
28 | | - 'Bosnian': 'bos', |
29 | | - 'Breton': 'bre', |
30 | | - 'Bulgarian': 'bul', |
31 | | - 'Catalan': 'cat', |
32 | | - 'Czech': 'ces', |
33 | | - 'Cherokee': 'chr', |
34 | | - 'Simplified Chinese': 'cmn', |
35 | | - 'Cornish': 'cor', |
36 | | - 'Welsh': 'cym', |
37 | | - 'Danish': 'dan', |
38 | | - 'German': 'deu', |
39 | | - 'Dzongkha': 'dzo', |
40 | | - 'Greek': 'ell', |
41 | | - 'English': 'eng', |
42 | | - 'Esperanto': 'epo', |
43 | | - 'Estonian': 'est', |
44 | | - 'Basque': 'eus', |
45 | | - 'Ewe': 'ewe', |
46 | | - 'Faroese': 'fao', |
47 | | - 'Farsi': 'fas', |
48 | | - 'Finnish': 'fin', |
49 | | - 'French': 'fra', |
50 | | - 'Western Frisian': 'fry', |
51 | | - 'Fulah': 'ful', |
52 | | - 'Friulian': 'fur', |
53 | | - 'Scottish Gaelic': 'gla', |
54 | | - 'Irish': 'gle', |
55 | | - 'Galician': 'glg', |
56 | | - 'Manx': 'glv', |
57 | | - 'Ancient Greek': 'grc', |
58 | | - 'Gujarati': 'guj', |
59 | | - 'Haitian': 'hat', |
60 | | - 'Hausa': 'hau', |
61 | | - 'Serbo-Croatian': 'hbs', |
62 | | - 'Hebrew': 'heb', |
63 | | - 'Hindi': 'hin', |
64 | | - 'Croatian': 'hrv', |
65 | | - 'Hungarian': 'hun', |
66 | | - 'Armenian': 'hye', |
67 | | - 'Igbo': 'ibo', |
68 | | - 'Ido': 'ido', |
69 | | - 'Sichuan Yi': 'iii', |
70 | | - 'Interlingua': 'ina', |
71 | | - 'Indonesian': 'ind', |
72 | | - 'Icelandic': 'isl', |
73 | | - 'Italian': 'ita', |
74 | | - 'Japanese': 'jpn', |
75 | | - 'Kalaallisut': 'kal', |
76 | | - 'Kannada': 'kan', |
77 | | - 'Georgian': 'kat', |
78 | | - 'Kazakh': 'kaz', |
79 | | - 'Central Khmer': 'khm', |
80 | | - 'Kikuyu': 'kik', |
81 | | - 'Kinyarwanda': 'kin', |
82 | | - 'Kirghiz': 'kir', |
83 | | - 'Korean': 'kor', |
84 | | - 'Kurdish': 'kur', |
85 | | - 'Lao': 'lao', |
86 | | - 'Latin': 'lat', |
87 | | - 'Latvian': 'lav', |
88 | | - 'Lingala': 'lin', |
89 | | - 'Lithuanian': 'lit', |
90 | | - 'Latgalian': 'ltg', |
91 | | - 'Luxembourgish': 'ltz', |
92 | | - 'Luba-Katanga': 'lub', |
93 | | - 'Ganda': 'lug', |
94 | | - 'Malayalam': 'mal', |
95 | | - 'Marathi': 'mar', |
96 | | - 'Macedonian': 'mkd', |
97 | | - 'Malagasy': 'mlg', |
98 | | - 'Maltese': 'mlt', |
99 | | - 'Mongolian': 'mon', |
100 | | - 'Maori': 'mri', |
101 | | - 'Burmese': 'mya', |
102 | | - 'Min Nan Chinese': 'nan', |
103 | | - 'Navajo': 'nav', |
104 | | - 'South Ndebele': 'nbl', |
105 | | - 'North Ndebele': 'nde', |
106 | | - 'Nepali': 'nep', |
107 | | - 'Dutch': 'nld', |
108 | | - 'Nynorsk': 'nno', |
109 | | - 'Bokmål': 'nob', |
110 | | - 'Occitan': 'oci', |
111 | | - 'Oriya': 'ori', |
112 | | - 'Oromo': 'orm', |
113 | | - 'Panjabi': 'pan', |
114 | | - 'Polish': 'pol', |
115 | | - 'Portuguese': 'por', |
116 | | - 'Pushto': 'pus', |
117 | | - 'Traditional Chinese': 'qcn', |
118 | | - 'Romansh': 'roh', |
119 | | - 'Romanian': 'ron', |
120 | | - 'Rundi': 'run', |
121 | | - 'Macedo-Romanian': 'rup', |
122 | | - 'Russian': 'rus', |
123 | | - 'Sango': 'sag', |
124 | | - 'Sanskrit': 'san', |
125 | | - 'Sicilian': 'scn', |
126 | | - 'Sinhala': 'sin', |
127 | | - 'Slovak': 'slk', |
128 | | - 'Slovene': 'slv', |
129 | | - 'Northern Sami': 'sme', |
130 | | - 'Shona': 'sna', |
131 | | - 'Somali': 'som', |
132 | | - 'Southern Sotho': 'sot', |
133 | | - 'Spanish': 'spa', |
134 | | - 'Sardinian': 'srd', |
135 | | - 'Serbian': 'srp', |
136 | | - 'Swati': 'ssw', |
137 | | - 'Swahili': 'swa', |
138 | | - 'Swedish': 'swe', |
139 | | - 'Tamil': 'tam', |
140 | | - 'Tatar': 'tat', |
141 | | - 'Telugu': 'tel', |
142 | | - 'Tajik': 'tgk', |
143 | | - 'Tagalog': 'tgl', |
144 | | - 'Thai': 'tha', |
145 | | - 'Tigrinya': 'tir', |
146 | | - 'Tonga': 'ton', |
147 | | - 'Tswana': 'tsn', |
148 | | - 'Tsonga': 'tso', |
149 | | - 'Turkmen': 'tuk', |
150 | | - 'Turkish': 'tur', |
151 | | - 'Ukrainian': 'ukr', |
152 | | - 'Urdu': 'urd', |
153 | | - 'Uzbek': 'uzb', |
154 | | - 'Venda': 'ven', |
155 | | - 'Vietnamese': 'vie', |
156 | | - 'Volapük': 'vol', |
157 | | - 'Xhosa': 'xho', |
158 | | - 'Yiddish': 'yid', |
159 | | - 'Yoruba': 'yor', |
160 | | - 'Yue Chinese': 'yue', |
161 | | - 'Malaysian': 'zsm', |
162 | | - 'Zulu': 'zul', |
| 13 | + "Afar": "aar", |
| 14 | + "Afrikaans": "afr", |
| 15 | + "Akan": "aka", |
| 16 | + "Albanian": "als", |
| 17 | + "Amharic": "amh", |
| 18 | + "Old English": "ang", |
| 19 | + "Arabic": "arb", |
| 20 | + "Egyptian Arabic": "arz", |
| 21 | + "Assamese": "asm", |
| 22 | + "Asturian": "ast", |
| 23 | + "Azerbaijani": "aze", |
| 24 | + "Bambara": "bam", |
| 25 | + "Belarusian": "bel", |
| 26 | + "Bengali": "ben", |
| 27 | + "Tibetan": "bod", |
| 28 | + "Bosnian": "bos", |
| 29 | + "Breton": "bre", |
| 30 | + "Bulgarian": "bul", |
| 31 | + "Catalan": "cat", |
| 32 | + "Czech": "ces", |
| 33 | + "Cherokee": "chr", |
| 34 | + "Simplified Chinese": "cmn", |
| 35 | + "Cornish": "cor", |
| 36 | + "Welsh": "cym", |
| 37 | + "Danish": "dan", |
| 38 | + "German": "deu", |
| 39 | + "Dzongkha": "dzo", |
| 40 | + "Greek": "ell", |
| 41 | + "English": "eng", |
| 42 | + "Esperanto": "epo", |
| 43 | + "Estonian": "est", |
| 44 | + "Basque": "eus", |
| 45 | + "Ewe": "ewe", |
| 46 | + "Faroese": "fao", |
| 47 | + "Farsi": "fas", |
| 48 | + "Finnish": "fin", |
| 49 | + "French": "fra", |
| 50 | + "Western Frisian": "fry", |
| 51 | + "Fulah": "ful", |
| 52 | + "Friulian": "fur", |
| 53 | + "Scottish Gaelic": "gla", |
| 54 | + "Irish": "gle", |
| 55 | + "Galician": "glg", |
| 56 | + "Manx": "glv", |
| 57 | + "Ancient Greek": "grc", |
| 58 | + "Gujarati": "guj", |
| 59 | + "Haitian": "hat", |
| 60 | + "Hausa": "hau", |
| 61 | + "Serbo-Croatian": "hbs", |
| 62 | + "Hebrew": "heb", |
| 63 | + "Hindi": "hin", |
| 64 | + "Croatian": "hrv", |
| 65 | + "Hungarian": "hun", |
| 66 | + "Armenian": "hye", |
| 67 | + "Igbo": "ibo", |
| 68 | + "Ido": "ido", |
| 69 | + "Sichuan Yi": "iii", |
| 70 | + "Interlingua": "ina", |
| 71 | + "Indonesian": "ind", |
| 72 | + "Icelandic": "isl", |
| 73 | + "Italian": "ita", |
| 74 | + "Japanese": "jpn", |
| 75 | + "Kalaallisut": "kal", |
| 76 | + "Kannada": "kan", |
| 77 | + "Georgian": "kat", |
| 78 | + "Kazakh": "kaz", |
| 79 | + "Central Khmer": "khm", |
| 80 | + "Kikuyu": "kik", |
| 81 | + "Kinyarwanda": "kin", |
| 82 | + "Kirghiz": "kir", |
| 83 | + "Korean": "kor", |
| 84 | + "Kurdish": "kur", |
| 85 | + "Lao": "lao", |
| 86 | + "Latin": "lat", |
| 87 | + "Latvian": "lav", |
| 88 | + "Lingala": "lin", |
| 89 | + "Lithuanian": "lit", |
| 90 | + "Latgalian": "ltg", |
| 91 | + "Luxembourgish": "ltz", |
| 92 | + "Luba-Katanga": "lub", |
| 93 | + "Ganda": "lug", |
| 94 | + "Malayalam": "mal", |
| 95 | + "Marathi": "mar", |
| 96 | + "Macedonian": "mkd", |
| 97 | + "Malagasy": "mlg", |
| 98 | + "Maltese": "mlt", |
| 99 | + "Mongolian": "mon", |
| 100 | + "Maori": "mri", |
| 101 | + "Burmese": "mya", |
| 102 | + "Min Nan Chinese": "nan", |
| 103 | + "Navajo": "nav", |
| 104 | + "South Ndebele": "nbl", |
| 105 | + "North Ndebele": "nde", |
| 106 | + "Nepali": "nep", |
| 107 | + "Dutch": "nld", |
| 108 | + "Nynorsk": "nno", |
| 109 | + "Bokmål": "nob", |
| 110 | + "Occitan": "oci", |
| 111 | + "Oriya": "ori", |
| 112 | + "Oromo": "orm", |
| 113 | + "Panjabi": "pan", |
| 114 | + "Polish": "pol", |
| 115 | + "Portuguese": "por", |
| 116 | + "Pushto": "pus", |
| 117 | + "Traditional Chinese": "qcn", |
| 118 | + "Romansh": "roh", |
| 119 | + "Romanian": "ron", |
| 120 | + "Rundi": "run", |
| 121 | + "Macedo-Romanian": "rup", |
| 122 | + "Russian": "rus", |
| 123 | + "Sango": "sag", |
| 124 | + "Sanskrit": "san", |
| 125 | + "Sicilian": "scn", |
| 126 | + "Sinhala": "sin", |
| 127 | + "Slovak": "slk", |
| 128 | + "Slovene": "slv", |
| 129 | + "Northern Sami": "sme", |
| 130 | + "Shona": "sna", |
| 131 | + "Somali": "som", |
| 132 | + "Southern Sotho": "sot", |
| 133 | + "Spanish": "spa", |
| 134 | + "Sardinian": "srd", |
| 135 | + "Serbian": "srp", |
| 136 | + "Swati": "ssw", |
| 137 | + "Swahili": "swa", |
| 138 | + "Swedish": "swe", |
| 139 | + "Tamil": "tam", |
| 140 | + "Tatar": "tat", |
| 141 | + "Telugu": "tel", |
| 142 | + "Tajik": "tgk", |
| 143 | + "Tagalog": "tgl", |
| 144 | + "Thai": "tha", |
| 145 | + "Tigrinya": "tir", |
| 146 | + "Tonga": "ton", |
| 147 | + "Tswana": "tsn", |
| 148 | + "Tsonga": "tso", |
| 149 | + "Turkmen": "tuk", |
| 150 | + "Turkish": "tur", |
| 151 | + "Ukrainian": "ukr", |
| 152 | + "Urdu": "urd", |
| 153 | + "Uzbek": "uzb", |
| 154 | + "Venda": "ven", |
| 155 | + "Vietnamese": "vie", |
| 156 | + "Volapük": "vol", |
| 157 | + "Xhosa": "xho", |
| 158 | + "Yiddish": "yid", |
| 159 | + "Yoruba": "yor", |
| 160 | + "Yue Chinese": "yue", |
| 161 | + "Malaysian": "zsm", |
| 162 | + "Zulu": "zul", |
163 | 163 | } |
0 commit comments