23
23
24
24
require_once 'PEAR.php ' ;
25
25
require_once 'Text/LanguageDetect/Parser.php ' ;
26
+ require_once 'Text/LanguageDetect/ISO639.php ' ;
26
27
27
28
/**
28
29
* Language detection class
@@ -172,6 +173,15 @@ class Text_LanguageDetect
172
173
*/
173
174
var $ _clusters ;
174
175
176
+ /**
177
+ * Which type of "language names" are accepted and returned:
178
+ *
179
+ * 0 - language name ("english")
180
+ * 2 - 2-letter ISO 639-1 code ("en")
181
+ * 3 - 3-letter ISO 639-2 code ("eng")
182
+ */
183
+ var $ _name_mode = 0 ;
184
+
175
185
/**
176
186
* Constructor
177
187
*
@@ -319,6 +329,8 @@ function omitLanguages($omit_list, $include_only = false)
319
329
320
330
$ deleted = 0 ;
321
331
332
+ $ omit_list = $ this ->_convertFromNameMode ($ omit_list );
333
+
322
334
// deleting the given languages
323
335
if (!$ include_only ) {
324
336
if (!is_array ($ omit_list )) {
@@ -396,6 +408,7 @@ function languageExists($lang)
396
408
if (!$ this ->_setup_ok ($ err )) {
397
409
return $ err ;
398
410
} else {
411
+ $ lang = $ this ->_convertFromNameMode ($ lang );
399
412
// string
400
413
if (is_string ($ lang )) {
401
414
return isset ($ this ->_lang_db [strtolower ($ lang )]);
@@ -428,7 +441,9 @@ function getLanguages()
428
441
if (!$ this ->_setup_ok ($ err )) {
429
442
return $ err ;
430
443
} else {
431
- return array_keys ($ this ->_lang_db );
444
+ return $ this ->_convertToNameMode (
445
+ array_keys ($ this ->_lang_db )
446
+ );
432
447
}
433
448
}
434
449
@@ -452,6 +467,21 @@ function setPerlCompatible($setting = true)
452
467
453
468
}
454
469
470
+ /**
471
+ * Sets the way how language names are accepted and returned.
472
+ *
473
+ * @param integer $name_mode One of the following modes:
474
+ * 0 - language name ("english")
475
+ * 2 - 2-letter ISO 639-1 code ("en")
476
+ * 3 - 3-letter ISO 639-2 code ("eng")
477
+ *
478
+ * @return void
479
+ */
480
+ function setNameMode ($ name_mode )
481
+ {
482
+ $ this ->_name_mode = $ name_mode ;
483
+ }
484
+
455
485
/**
456
486
* Whether to use unicode block ranges in detection
457
487
*
@@ -812,9 +842,9 @@ function detect($sample, $limit = 0)
812
842
$ limited_scores [$ key ] = $ value ;
813
843
}
814
844
815
- return $ limited_scores ;
845
+ return $ this -> _convertToNameMode ( $ limited_scores, true ) ;
816
846
} else {
817
- return $ scores ;
847
+ return $ this -> _convertToNameMode ( $ scores, true ) ;
818
848
}
819
849
}
820
850
@@ -847,7 +877,7 @@ function detectSimple($sample)
847
877
return null ;
848
878
849
879
} else {
850
- return ucfirst ( key ($ scores) );
880
+ return key ($ scores );
851
881
}
852
882
}
853
883
@@ -893,7 +923,7 @@ function detectConfidence($sample)
893
923
return null ;
894
924
}
895
925
896
- $ arr ['language ' ] = ucfirst ( key ($ scores) );
926
+ $ arr ['language ' ] = key ($ scores );
897
927
$ arr ['similarity ' ] = current ($ scores );
898
928
if (next ($ scores ) !== false ) { // if false then no next element
899
929
// the goal is to return a higher value if the distance between
@@ -1104,6 +1134,8 @@ function languageSimilarity($lang1 = null, $lang2 = null)
1104
1134
return $ err ;
1105
1135
}
1106
1136
1137
+ $ lang1 = $ this ->_convertFromNameMode ($ lang1 );
1138
+ $ lang2 = $ this ->_convertFromNameMode ($ lang2 );
1107
1139
if ($ lang1 != null ) {
1108
1140
$ lang1 = strtolower ($ lang1 );
1109
1141
@@ -1660,7 +1692,6 @@ function _next_char(&$str, &$counter, $special_convert = false)
1660
1692
1661
1693
// tag on next byte
1662
1694
return $ char . $ nextchar ;
1663
-
1664
1695
} elseif ($ ord >> 4 == 14 ) { // three-byte char
1665
1696
1666
1697
// tag on next 2 bytes
@@ -1676,6 +1707,85 @@ function _next_char(&$str, &$counter, $special_convert = false)
1676
1707
}
1677
1708
}
1678
1709
1710
+ /**
1711
+ * Converts an $language input parameter from the configured mode
1712
+ * to the language name that is used internally.
1713
+ *
1714
+ * Works for strings and arrays.
1715
+ *
1716
+ * @param string|array $lang A language description ("english"/"en"/"eng")
1717
+ * @param boolean $convertKey If $lang is an array, setting $key
1718
+ * converts the keys to the language name.
1719
+ *
1720
+ * @return string|array Language name
1721
+ */
1722
+ function _convertFromNameMode ($ lang , $ convertKey = false )
1723
+ {
1724
+ if ($ this ->_name_mode == 0 ) {
1725
+ return $ lang ;
1726
+ }
1727
+
1728
+ if ($ this ->_name_mode == 2 ) {
1729
+ $ method = 'code2ToName ' ;
1730
+ } else {
1731
+ $ method = 'code3ToName ' ;
1732
+ }
1733
+
1734
+ if (is_string ($ lang )) {
1735
+ return (string )Text_LanguageDetect_ISO639::$ method ($ lang );
1736
+ }
1737
+
1738
+ $ newlang = array ();
1739
+ foreach ($ lang as $ key => $ val ) {
1740
+ if ($ convertKey ) {
1741
+ $ newkey = (string )Text_LanguageDetect_ISO639::$ method ($ key );
1742
+ $ newlang [$ newkey ] = $ val ;
1743
+ } else {
1744
+ $ newlang [$ key ] = (string )Text_LanguageDetect_ISO639::$ method ($ val );
1745
+ }
1746
+ }
1747
+ return $ newlang ;
1748
+ }
1749
+
1750
+ /**
1751
+ * Converts an $language output parameter from the language name that is
1752
+ * used internally to the configured mode.
1753
+ *
1754
+ * Works for strings and arrays.
1755
+ *
1756
+ * @param string|array $lang A language description ("english"/"en"/"eng")
1757
+ * @param boolean $convertKey If $lang is an array, setting $key
1758
+ * converts the keys to the language name.
1759
+ *
1760
+ * @return string|array Language name
1761
+ */
1762
+ function _convertToNameMode ($ lang , $ convertKey = false )
1763
+ {
1764
+ if ($ this ->_name_mode == 0 ) {
1765
+ return $ lang ;
1766
+ }
1767
+
1768
+ if ($ this ->_name_mode == 2 ) {
1769
+ $ method = 'nameToCode2 ' ;
1770
+ } else {
1771
+ $ method = 'nameToCode3 ' ;
1772
+ }
1773
+
1774
+ if (is_string ($ lang )) {
1775
+ return Text_LanguageDetect_ISO639::$ method ($ lang );
1776
+ }
1777
+
1778
+ $ newlang = array ();
1779
+ foreach ($ lang as $ key => $ val ) {
1780
+ if ($ convertKey ) {
1781
+ $ newkey = Text_LanguageDetect_ISO639::$ method ($ key );
1782
+ $ newlang [$ newkey ] = $ val ;
1783
+ } else {
1784
+ $ newlang [$ key ] = Text_LanguageDetect_ISO639::$ method ($ val );
1785
+ }
1786
+ }
1787
+ return $ newlang ;
1788
+ }
1679
1789
}
1680
1790
1681
1791
/* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */
0 commit comments