Skip to content

Commit 2dec71b

Browse files
committed
begin implementing #19221: Return ISO 639-1 or ISO 639-2 language codes
git-svn-id: http://svn.php.net/repository/pear/packages/Text_LanguageDetect/trunk@322177 c90b9560-bf6c-de11-be94-00142212c4b1
1 parent ee7e920 commit 2dec71b

File tree

1 file changed

+155
-0
lines changed

1 file changed

+155
-0
lines changed

Text/LanguageDetect/ISO639.php

Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,155 @@
1+
<?php
2+
/**
3+
* Part of Text_LanguageDetect
4+
*
5+
* PHP version 5
6+
*
7+
* @category Text
8+
* @package Text_LanguageDetect
9+
* @author Christian Weiske <[email protected]>
10+
* @copyright 2011 Christian Weiske <[email protected]>
11+
* @license http://www.debian.org/misc/bsd.license BSD
12+
* @version SVN: $Id$
13+
* @link http://pear.php.net/package/Text_LanguageDetect/
14+
*/
15+
16+
/**
17+
* Provides a mapping between the languages from lang.dat and the
18+
* ISO 639-1 and ISO-639-2 codes.
19+
*
20+
* @category Text
21+
* @package Text_LanguageDetect
22+
* @author Christian Weiske <[email protected]>
23+
* @copyright 2011 Christian Weiske <[email protected]>
24+
* @license http://www.debian.org/misc/bsd.license BSD
25+
* @link http://pear.php.net/package/Text_LanguageDetect/
26+
*/
27+
class Text_LanguageDetect_ISO639
28+
{
29+
/**
30+
* Maps all language names from the language database to the
31+
* ISO 639-1 2-letter language code.
32+
*
33+
* NULL indicates that there is no 2-letter code.
34+
*
35+
* @var array
36+
*/
37+
public static $name2code2 = array(
38+
'albanian' => 'sq',
39+
'arabic' => 'ar',
40+
'azeri' => 'az',
41+
'bengali' => 'bn',
42+
'bulgarian' => 'bg',
43+
'cebuano' => null,
44+
'croatian' => 'hr',
45+
'czech' => 'cs',
46+
'danish' => 'da',
47+
'dutch' => 'nl',
48+
'english' => 'en',
49+
'estonian' => 'et',
50+
'farsi' => 'fa',
51+
'finnish' => 'fi',
52+
'french' => 'fr',
53+
'german' => 'de',
54+
'hausa' => 'ha',
55+
'hawaiian' => null,
56+
'hindi' => 'hi',
57+
'hungarian' => 'hu',
58+
'icelandic' => 'is',
59+
'indonesian' => 'id',
60+
'italian' => 'it',
61+
'kazakh' => 'kk',
62+
'kyrgyz' => 'ky',
63+
'latin' => 'la',
64+
'latvian' => 'lv',
65+
'lithuanian' => 'lt',
66+
'macedonian' => 'mk',
67+
'mongolian' => 'mn',
68+
'nepali' => 'ne',
69+
'norwegian' => 'no',
70+
'pashto' => 'ps',
71+
'pidgin' => null,
72+
'polish' => 'pl',
73+
'portuguese' => 'pt',
74+
'romanian' => 'ro',
75+
'russian' => 'ru',
76+
'serbian' => 'sr',
77+
'slovak' => 'sk',
78+
'slovene' => 'sl',
79+
'somali' => 'so',
80+
'spanish' => 'es',
81+
'swahili' => 'sw',
82+
'swedish' => 'sv',
83+
'tagalog' => 'tl',
84+
'turkish' => 'tr',
85+
'ukrainian' => 'uk',
86+
'urdu' => 'ur',
87+
'uzbek' => 'uz',
88+
'vietnamese' => 'vi',
89+
'welsh' => 'cy',
90+
);
91+
92+
/**
93+
* Maps all language names from the language database to the
94+
* ISO 639-2 3-letter language code.
95+
*
96+
* @var array
97+
*/
98+
public static $name2code3 = array(
99+
'albanian' => 'sqi',
100+
'arabic' => 'ara',
101+
'azeri' => 'aze',
102+
'bengali' => 'ben',
103+
'bulgarian' => 'bul',
104+
'cebuano' => 'ceb',
105+
'croatian' => 'hrv',
106+
'czech' => 'ces',
107+
'danish' => 'dan',
108+
'dutch' => 'nld',
109+
'english' => 'eng',
110+
'estonian' => 'est',
111+
'farsi' => 'fas',
112+
'finnish' => 'fin',
113+
'french' => 'fra',
114+
'german' => 'deu',
115+
'hausa' => 'hau',
116+
'hawaiian' => 'haw',
117+
'hindi' => 'hin',
118+
'hungarian' => 'hun',
119+
'icelandic' => 'isl',
120+
'indonesian' => 'ind',
121+
'italian' => 'ita',
122+
'kazakh' => 'kaz',
123+
'kyrgyz' => 'kir',
124+
'latin' => 'lat',
125+
'latvian' => 'lav',
126+
'lithuanian' => 'lit',
127+
'macedonian' => 'mkd',
128+
'mongolian' => 'mon',
129+
'nepali' => 'nep',
130+
'norwegian' => 'nor',
131+
'pashto' => 'pus',
132+
'pidgin' => 'crp',
133+
'polish' => 'pol',
134+
'portuguese' => 'por',
135+
'romanian' => 'ron',
136+
'russian' => 'rus',
137+
'serbian' => 'srp',
138+
'slovak' => 'slk',
139+
'slovene' => 'slv',
140+
'somali' => 'som',
141+
'spanish' => 'spa',
142+
'swahili' => 'swa',
143+
'swedish' => 'swe',
144+
'tagalog' => 'tgl',
145+
'turkish' => 'tur',
146+
'ukrainian' => 'ukr',
147+
'urdu' => 'urd',
148+
'uzbek' => 'uzb',
149+
'vietnamese' => 'vie',
150+
'welsh' => 'cym',
151+
);
152+
153+
}
154+
155+
?>

0 commit comments

Comments
 (0)