Skip to content

Commit abd9d81

Browse files
committed
add more tests, remove php4-specific code, remove code that can never be executed
git-svn-id: http://svn.php.net/repository/pear/packages/Text_LanguageDetect/trunk@322352 c90b9560-bf6c-de11-be94-00142212c4b1
1 parent a8845aa commit abd9d81

File tree

2 files changed

+74
-48
lines changed

2 files changed

+74
-48
lines changed

Text/LanguageDetect.php

Lines changed: 13 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -343,7 +343,7 @@ public function omitLanguages($omit_list, $include_only = false)
343343
// reset the cluster cache if the number of languages changes
344344
// this will then have to be recalculated
345345
if (isset($this->_clusters) && $deleted > 0) {
346-
unset($this->_clusters);
346+
$this->_clusters = null;
347347
}
348348

349349
return $deleted;
@@ -386,7 +386,7 @@ public function languageExists($lang)
386386

387387
} else {
388388
throw new Text_LanguageDetect_Exception(
389-
'Unknown type passed to languageExists()',
389+
'Unsupported parameter type passed to languageExists()',
390390
Text_LanguageDetect_Exception::PARAM_TYPE
391391
);
392392
}
@@ -697,18 +697,9 @@ public function detect($sample, $limit = 0)
697697
if ($encoding != 'ASCII' && $encoding != 'UTF-8'
698698
&& $encoding !== false
699699
) {
700-
if (function_exists('mb_list_encodings')) {
701-
// verify the encoding exists in mb_list_encodings
702-
if (in_array($encoding, mb_list_encodings())) {
703-
$sample = mb_convert_encoding($sample, 'UTF-8', $encoding);
704-
}
705-
706-
// if the previous condition failed:
707-
// somehow we detected an encoding that also we don't support
708-
} else {
709-
// php 4 doesnt have mb_list_encodings()
710-
// so attempt with error suppression
711-
$sample = @mb_convert_encoding($sample, 'UTF-8', $encoding);
700+
// verify the encoding exists in mb_list_encodings
701+
if (in_array($encoding, mb_list_encodings())) {
702+
$sample = mb_convert_encoding($sample, 'UTF-8', $encoding);
712703
}
713704
}
714705
}
@@ -831,8 +822,7 @@ public function detectSimple($sample)
831822

832823
// if top language has the maximum possible score,
833824
// then the top score will have been picked at random
834-
if (!is_array($scores)
835-
|| empty($scores)
825+
if (!is_array($scores) || empty($scores)
836826
|| current($scores) == $this->_max_score
837827
) {
838828
return null;
@@ -872,8 +862,7 @@ public function detectConfidence($sample)
872862

873863
// if most similar language has the max score, it
874864
// will have been picked at random
875-
if (!is_array($scores)
876-
|| empty($scores)
865+
if (!is_array($scores) || empty($scores)
877866
|| current($scores) == $this->_max_score
878867
) {
879868
return null;
@@ -916,20 +905,8 @@ public function detectConfidence($sample)
916905
*/
917906
public function detectUnicodeBlocks($str, $skip_symbols)
918907
{
919-
// input check
920-
if (!is_bool($skip_symbols)) {
921-
throw new Text_LanguageDetect_Exception(
922-
'Second parameter must be boolean',
923-
Text_LanguageDetect_Exception::PARAM_TYPE
924-
);
925-
}
926-
927-
if (!is_string($str)) {
928-
throw new Text_LanguageDetect_Exception(
929-
'First parameter was not a string',
930-
Text_LanguageDetect_Exception::PARAM_TYPE
931-
);
932-
}
908+
$skip_symbols = (bool)$skip_symbols;
909+
$str = (string)$str;
933910

934911
$sample_obj = new Text_LanguageDetect_Parser($str);
935912
$sample_obj->prepareUnicode();
@@ -961,20 +938,12 @@ public function unicodeBlockName($unicode)
961938
// assume it is being passed a utf8 char, so convert it
962939
if (self::utf8strlen($unicode) > 1) {
963940
throw new Text_LanguageDetect_Exception(
964-
'Pass this function only a single char',
941+
'Pass a single char only to this method',
965942
Text_LanguageDetect_Exception::PARAM_TYPE
966943
);
967944
}
968-
969945
$unicode = $this->_utf8char2unicode($unicode);
970946

971-
if ($unicode == -1) {
972-
throw new Text_LanguageDetect_Exception(
973-
'Malformatted char',
974-
Text_LanguageDetect_Exception::INVALID_CHAR
975-
);
976-
}
977-
978947
} elseif (!is_int($unicode)) {
979948
throw new Text_LanguageDetect_Exception(
980949
'Input must be of type string or int.',
@@ -1220,6 +1189,8 @@ function clusterLanguages()
12201189
unset($langs[$old_key]);
12211190
}
12221191

1192+
$result_data = $really_map = array();
1193+
12231194
$i = 0;
12241195
while (count($langs) > 2 && $i++ < 200) {
12251196
$highest_score = -1;
@@ -1521,7 +1492,7 @@ public static function utf8strlen($str)
15211492
*
15221493
* @param string $char a utf8 (possibly multi-byte) char
15231494
*
1524-
* @return int unicode value or -1 if malformatted
1495+
* @return int unicode value
15251496
* @access protected
15261497
* @link http://en.wikipedia.org/wiki/UTF-8
15271498
*/
@@ -1558,10 +1529,6 @@ function _utf8char2unicode($char)
15581529
$x1 = (ord($char{2}) & 0x0000003F) << 6;
15591530
$x2 = (ord($char{3}) & 0x0000003F);
15601531
return ($z1 | $z2 | $x1 | $x2);
1561-
1562-
default:
1563-
// error: malformatted char?
1564-
return -1;
15651532
}
15661533
}
15671534

tests/Text_LanguageDetectTest.php

Lines changed: 61 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,7 @@ function test_error ()
170170
$this->assertEquals(null, $result);
171171
}
172172

173-
function test_omit ()
173+
function testOmitLanguages()
174174
{
175175
$str = 'This function may return Boolean FALSE, but may also return a non-Boolean value which evaluates to FALSE, such as 0 or "". Please read the section on Booleans for more information. Use the === operator for testing the return value of this function.';
176176

@@ -211,12 +211,30 @@ function test_omit ()
211211
unset($myobj);
212212
}
213213

214-
function test_omitNameMode2()
214+
function testOmitLanguagesNameMode2()
215215
{
216216
$this->x->setNameMode(2);
217217
$this->assertEquals(1, $this->x->omitLanguages('en'));
218218
}
219219

220+
function testOmitLanguagesIncludeString()
221+
{
222+
$this->assertGreaterThan(1, $this->x->omitLanguages('english', true));
223+
$langs = $this->x->getLanguages();
224+
$this->assertEquals(1, count($langs));
225+
$this->assertContains('english', $langs);
226+
}
227+
228+
function testOmitLanguagesClearsClusterCache()
229+
{
230+
$this->x->omitLanguages(array('english', 'german'), true);
231+
$this->assertNull($this->x->_clusters);
232+
$this->x->clusterLanguages();
233+
$this->assertNotNull($this->x->_clusters);
234+
$this->x->omitLanguages('german');
235+
$this->assertNull($this->x->_clusters, 'cluster cache be empty now');
236+
}
237+
220238
function test_perl_compatibility()
221239
{
222240
// if this test fails, then many of the others will
@@ -1396,6 +1414,15 @@ function testLanguageExistsArrayNameMode2()
13961414
$this->assertFalse($this->x->languageExists(array('en', 'doesnotexist')));
13971415
}
13981416

1417+
/**
1418+
* @expectedException Text_LanguageDetect_Exception
1419+
* @expectedExceptionMessage Unsupported parameter type passed to languageExists()
1420+
*/
1421+
function testLanguageExistsUnsupportedType()
1422+
{
1423+
$this->x->languageExists(1.23);
1424+
}
1425+
13991426
function testGetLanguages()
14001427
{
14011428
$langs = $this->x->getLanguages();
@@ -1452,6 +1479,15 @@ function testDetectSimpleNameMode2()
14521479
$this->assertEquals('de', $lang, 'text is german');
14531480
}
14541481

1482+
function testDetectSimpleNoLanguages()
1483+
{
1484+
$this->x->omitLanguages('english', true);
1485+
$this->x->omitLanguages('english', false);
1486+
$this->assertNull(
1487+
$this->x->detectSimple('Das ist ein kleiner Text für euch alle')
1488+
);
1489+
}
1490+
14551491
function testLanguageSimilarity()
14561492
{
14571493
$this->x->setPerlCompatible(true);
@@ -1538,6 +1574,11 @@ function test_compatibility ()
15381574

15391575
}
15401576

1577+
function testDetectConfidenceNoText()
1578+
{
1579+
$this->assertNull($this->x->detectConfidence(''));
1580+
}
1581+
15411582
function test_omit_error ()
15421583
{
15431584
$str = 'On January 29, 1737, Thomas Paine was born in Thetford, England. His father, a corseter, had grand visions for his son, but by the age of 12, Thomas had failed out of school. The young Paine began apprenticing for his father, but again, he failed.';
@@ -1642,6 +1683,24 @@ function test_block_detection()
16421683
}
16431684
}
16441685

1686+
/**
1687+
* @expectedException Text_LanguageDetect_Exception
1688+
* @expectedExceptionMessage Pass a single char only to this method
1689+
*/
1690+
function testUnicodeBlockNameParamString()
1691+
{
1692+
$this->x->unicodeBlockName('foo bar baz');
1693+
}
1694+
1695+
/**
1696+
* @expectedException Text_LanguageDetect_Exception
1697+
* @expectedExceptionMessage Input must be of type string or int
1698+
*/
1699+
function testUnicodeBlockNameUnsupportedParamType()
1700+
{
1701+
$this->x->unicodeBlockName(1.23);
1702+
}
1703+
16451704

16461705
// utility function
16471706
// found in http://www.php.net/manual/en/function.utf8-encode.php#49336

0 commit comments

Comments
 (0)