Skip to content

Commit 265d4a2

Browse files
committed
part of #19221: setNameMode() is used now!
git-svn-id: http://svn.php.net/repository/pear/packages/Text_LanguageDetect/trunk@322219 c90b9560-bf6c-de11-be94-00142212c4b1
1 parent 2ee13b2 commit 265d4a2

File tree

2 files changed

+345
-12
lines changed

2 files changed

+345
-12
lines changed

Text/LanguageDetect.php

Lines changed: 116 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323

2424
require_once 'PEAR.php';
2525
require_once 'Text/LanguageDetect/Parser.php';
26+
require_once 'Text/LanguageDetect/ISO639.php';
2627

2728
/**
2829
* Language detection class
@@ -172,6 +173,15 @@ class Text_LanguageDetect
172173
*/
173174
var $_clusters;
174175

176+
/**
177+
* Which type of "language names" are accepted and returned:
178+
*
179+
* 0 - language name ("english")
180+
* 2 - 2-letter ISO 639-1 code ("en")
181+
* 3 - 3-letter ISO 639-2 code ("eng")
182+
*/
183+
var $_name_mode = 0;
184+
175185
/**
176186
* Constructor
177187
*
@@ -319,6 +329,8 @@ function omitLanguages($omit_list, $include_only = false)
319329

320330
$deleted = 0;
321331

332+
$omit_list = $this->_convertFromNameMode($omit_list);
333+
322334
// deleting the given languages
323335
if (!$include_only) {
324336
if (!is_array($omit_list)) {
@@ -396,6 +408,7 @@ function languageExists($lang)
396408
if (!$this->_setup_ok($err)) {
397409
return $err;
398410
} else {
411+
$lang = $this->_convertFromNameMode($lang);
399412
// string
400413
if (is_string($lang)) {
401414
return isset($this->_lang_db[strtolower($lang)]);
@@ -428,7 +441,9 @@ function getLanguages()
428441
if (!$this->_setup_ok($err)) {
429442
return $err;
430443
} else {
431-
return array_keys($this->_lang_db);
444+
return $this->_convertToNameMode(
445+
array_keys($this->_lang_db)
446+
);
432447
}
433448
}
434449

@@ -452,6 +467,21 @@ function setPerlCompatible($setting = true)
452467

453468
}
454469

470+
/**
471+
* Sets the way how language names are accepted and returned.
472+
*
473+
* @param integer $name_mode One of the following modes:
474+
* 0 - language name ("english")
475+
* 2 - 2-letter ISO 639-1 code ("en")
476+
* 3 - 3-letter ISO 639-2 code ("eng")
477+
*
478+
* @return void
479+
*/
480+
function setNameMode($name_mode)
481+
{
482+
$this->_name_mode = $name_mode;
483+
}
484+
455485
/**
456486
* Whether to use unicode block ranges in detection
457487
*
@@ -812,9 +842,9 @@ function detect($sample, $limit = 0)
812842
$limited_scores[$key] = $value;
813843
}
814844

815-
return $limited_scores;
845+
return $this->_convertToNameMode($limited_scores, true);
816846
} else {
817-
return $scores;
847+
return $this->_convertToNameMode($scores, true);
818848
}
819849
}
820850

@@ -847,7 +877,7 @@ function detectSimple($sample)
847877
return null;
848878

849879
} else {
850-
return ucfirst(key($scores));
880+
return key($scores);
851881
}
852882
}
853883

@@ -893,7 +923,7 @@ function detectConfidence($sample)
893923
return null;
894924
}
895925

896-
$arr['language'] = ucfirst(key($scores));
926+
$arr['language'] = key($scores);
897927
$arr['similarity'] = current($scores);
898928
if (next($scores) !== false) { // if false then no next element
899929
// the goal is to return a higher value if the distance between
@@ -1104,6 +1134,8 @@ function languageSimilarity($lang1 = null, $lang2 = null)
11041134
return $err;
11051135
}
11061136

1137+
$lang1 = $this->_convertFromNameMode($lang1);
1138+
$lang2 = $this->_convertFromNameMode($lang2);
11071139
if ($lang1 != null) {
11081140
$lang1 = strtolower($lang1);
11091141

@@ -1660,7 +1692,6 @@ function _next_char(&$str, &$counter, $special_convert = false)
16601692

16611693
// tag on next byte
16621694
return $char . $nextchar;
1663-
16641695
} elseif ($ord >> 4 == 14) { // three-byte char
16651696

16661697
// tag on next 2 bytes
@@ -1676,6 +1707,85 @@ function _next_char(&$str, &$counter, $special_convert = false)
16761707
}
16771708
}
16781709

1710+
/**
1711+
* Converts an $language input parameter from the configured mode
1712+
* to the language name that is used internally.
1713+
*
1714+
* Works for strings and arrays.
1715+
*
1716+
* @param string|array $lang A language description ("english"/"en"/"eng")
1717+
* @param boolean $convertKey If $lang is an array, setting $key
1718+
* converts the keys to the language name.
1719+
*
1720+
* @return string|array Language name
1721+
*/
1722+
function _convertFromNameMode($lang, $convertKey = false)
1723+
{
1724+
if ($this->_name_mode == 0) {
1725+
return $lang;
1726+
}
1727+
1728+
if ($this->_name_mode == 2) {
1729+
$method = 'code2ToName';
1730+
} else {
1731+
$method = 'code3ToName';
1732+
}
1733+
1734+
if (is_string($lang)) {
1735+
return (string)Text_LanguageDetect_ISO639::$method($lang);
1736+
}
1737+
1738+
$newlang = array();
1739+
foreach ($lang as $key => $val) {
1740+
if ($convertKey) {
1741+
$newkey = (string)Text_LanguageDetect_ISO639::$method($key);
1742+
$newlang[$newkey] = $val;
1743+
} else {
1744+
$newlang[$key] = (string)Text_LanguageDetect_ISO639::$method($val);
1745+
}
1746+
}
1747+
return $newlang;
1748+
}
1749+
1750+
/**
1751+
* Converts an $language output parameter from the language name that is
1752+
* used internally to the configured mode.
1753+
*
1754+
* Works for strings and arrays.
1755+
*
1756+
* @param string|array $lang A language description ("english"/"en"/"eng")
1757+
* @param boolean $convertKey If $lang is an array, setting $key
1758+
* converts the keys to the language name.
1759+
*
1760+
* @return string|array Language name
1761+
*/
1762+
function _convertToNameMode($lang, $convertKey = false)
1763+
{
1764+
if ($this->_name_mode == 0) {
1765+
return $lang;
1766+
}
1767+
1768+
if ($this->_name_mode == 2) {
1769+
$method = 'nameToCode2';
1770+
} else {
1771+
$method = 'nameToCode3';
1772+
}
1773+
1774+
if (is_string($lang)) {
1775+
return Text_LanguageDetect_ISO639::$method($lang);
1776+
}
1777+
1778+
$newlang = array();
1779+
foreach ($lang as $key => $val) {
1780+
if ($convertKey) {
1781+
$newkey = Text_LanguageDetect_ISO639::$method($key);
1782+
$newlang[$newkey] = $val;
1783+
} else {
1784+
$newlang[$key] = Text_LanguageDetect_ISO639::$method($val);
1785+
}
1786+
}
1787+
return $newlang;
1788+
}
16791789
}
16801790

16811791
/* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */

0 commit comments

Comments
 (0)