Skip to content

Commit 670e747

Browse files
committed
commit non-working code (though tests pass, any error will crash) to prevent clockwerx again from breaking and fixing the things I have already fixed locally
git-svn-id: http://svn.php.net/repository/pear/packages/Text_LanguageDetect/trunk@322321 c90b9560-bf6c-de11-be94-00142212c4b1
1 parent 2739ca3 commit 670e747

File tree

3 files changed

+76
-90
lines changed

3 files changed

+76
-90
lines changed

Text/LanguageDetect.php

Lines changed: 72 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121
* @link http://langdetect.blogspot.com/
2222
*/
2323

24-
require_once 'PEAR.php';
2524
require_once 'Text/LanguageDetect/Exception.php';
2625
require_once 'Text/LanguageDetect/Parser.php';
2726
require_once 'Text/LanguageDetect/ISO639.php';
@@ -176,15 +175,13 @@ class Text_LanguageDetect
176175
/**
177176
* Constructor
178177
*
179-
* Will attempt to load the language database.
180-
*
181-
* @throws Text_LanguageDetect_Exception
182-
* @todo Avoid work in the constructor
178+
* Will attempt to load the language database. If it fails, you will get
179+
* an exception.
183180
*/
184-
function Text_LanguageDetect()
181+
function __construct()
185182
{
186183
$data = $this->_readdb($this->_db_filename);
187-
184+
$this->_checkTrigram($data['trigram']);
188185
$this->_lang_db = $data['trigram'];
189186

190187
if (isset($data['trigram-unicodemap'])) {
@@ -195,7 +192,6 @@ function Text_LanguageDetect()
195192
if (isset($data['trigram-clusters'])) {
196193
$this->_clusters = $data['trigram-clusters'];
197194
}
198-
199195
}
200196

201197
/**
@@ -240,44 +236,46 @@ function _readdb($fname)
240236

241237
// input check
242238
if (!file_exists($fname)) {
243-
throw new Text_LanguageDetect_Exception('Language database does not exist.');
239+
throw new Text_LanguageDetect_Exception(
240+
'Language database does not exist.',
241+
Text_LanguageDetect_Exception::DB_NOT_FOUND
242+
);
244243
} elseif (!is_readable($fname)) {
245-
throw new Text_LanguageDetect_Exception('Language database is not readable.');
244+
throw new Text_LanguageDetect_Exception(
245+
'Language database is not readable.',
246+
Text_LanguageDetect_Exception::DB_NOT_READABLE
247+
);
246248
}
247249

248-
if (function_exists('file_get_contents')) {
249-
return unserialize(file_get_contents($fname));
250-
} else {
251-
// if you don't have file_get_contents(),
252-
// then this is the next fastest way
253-
ob_start();
254-
readfile($fname);
255-
$contents = ob_get_contents();
256-
ob_end_clean();
257-
return unserialize($contents);
258-
}
250+
return unserialize(file_get_contents($fname));
259251
}
260252

261253

262254
/**
263255
* Checks if this object is ready to detect languages
264256
*
265257
* @access private
266-
* @return bool true if no errors
267-
* @throws Text_LanguageDetect_Exception
258+
*
259+
* @return void
268260
*/
269-
function _setup_ok()
261+
function _checkTrigram($trigram)
270262
{
271-
if (!is_array($this->_lang_db)) {
263+
if (!is_array($trigram)) {
272264
if (ini_get('magic_quotes_runtime')) {
273-
throw new Text_LanguageDetect_Exception('Error loading database. Try turning magic_quotes_runtime off.');
274-
} else {
275-
throw new Text_LanguageDetect_Exception('Language database is not an array.');
265+
throw new Text_LanguageDetect_Exception(
266+
'Error loading database. Try turning magic_quotes_runtime off.',
267+
Text_LanguageDetect_Exception::MAGIC_QUOTES
268+
);
276269
}
277-
} elseif (empty($this->_lang_db)) {
278-
throw new Text_LanguageDetect_Exception('Language database has no elements.');
279-
} else {
280-
return true;
270+
throw new Text_LanguageDetect_Exception(
271+
'Language database is not an array.',
272+
Text_LanguageDetect_Exception::DB_NOT_ARRAY
273+
);
274+
} elseif (empty($trigram)) {
275+
throw new Text_LanguageDetect_Exception(
276+
'Language database has no elements.',
277+
Text_LanguageDetect_Exception::DB_EMPTY
278+
);
281279
}
282280
}
283281

@@ -299,8 +297,6 @@ function _setup_ok()
299297
*/
300298
function omitLanguages($omit_list, $include_only = false)
301299
{
302-
$this->_setup_ok();
303-
304300
$deleted = 0;
305301

306302
$omit_list = $this->_convertFromNameMode($omit_list);
@@ -360,31 +356,21 @@ function omitLanguages($omit_list, $include_only = false)
360356
*/
361357
function getLanguageCount()
362358
{
363-
$this->_setup_ok();
364-
365359
return count($this->_lang_db);
366360
}
367361

368362
/**
369-
* Returns true if a given language exists
370-
*
371-
* If passed an array of names, will return true only if all exist
372-
*
373363
* @access public
374364
* @param mixed $lang language name or array of language names
375365
* @return bool true if language model exists
376-
* @throws Text_LanguageDetect_Exception
377366
*/
378367
function languageExists($lang)
379368
{
380-
$this->_setup_ok();
381-
382369
$lang = $this->_convertFromNameMode($lang);
383370
// string
384371
if (is_string($lang)) {
385372
return isset($this->_lang_db[strtolower($lang)]);
386373

387-
// array
388374
} elseif (is_array($lang)) {
389375
foreach ($lang as $test_lang) {
390376
if (!isset($this->_lang_db[strtolower($test_lang)])) {
@@ -393,23 +379,23 @@ function languageExists($lang)
393379
}
394380
return true;
395381

396-
// other (error)
397382
} else {
398-
throw new Text_LanguageDetect_Exception('Unknown type passed to languageExists()');
383+
throw new Text_LanguageDetect_Exception(
384+
'Unknown type passed to languageExists()',
385+
Text_LanguageDetect_Exception::UNKNOWN_TYPE
386+
);
399387
}
400388
}
401389

402390
/**
403391
* Returns the list of detectable languages
404392
*
405393
* @access public
406-
* @return array the names of the languages known to this object
394+
* @return array the names of the languages known to this object<<<<<<<
407395
* @throws Text_LanguageDetect_Exception
408396
*/
409397
function getLanguages()
410398
{
411-
$this->_setup_ok();
412-
413399
return $this->_convertToNameMode(
414400
array_keys($this->_lang_db)
415401
);
@@ -677,8 +663,6 @@ function _normalize_score($score, $base_count = null)
677663
*/
678664
function detect($sample, $limit = 0)
679665
{
680-
$this->_setup_ok();
681-
682666
// input check
683667
if (!Text_LanguageDetect_Parser::validateString($sample)) {
684668
return array();
@@ -739,7 +723,10 @@ function detect($sample, $limit = 0)
739723
if (is_array($blocks)) {
740724
$present_blocks = array_keys($blocks);
741725
} else {
742-
throw new Text_LanguageDetect_Exception('Error during block detection');
726+
throw new Text_LanguageDetect_Exception(
727+
'Error during block detection',
728+
Text_LanguageDetect_Exception::ERR_BLOCK_DETECTION
729+
);
743730
}
744731

745732
$possible_langs = array();
@@ -921,19 +908,25 @@ function detectUnicodeBlocks($str, $skip_symbols)
921908
{
922909
// input check
923910
if (!is_bool($skip_symbols)) {
924-
throw new Text_LanguageDetect_Exception('Second parameter must be boolean');
911+
throw new Text_LanguageDetect_Exception(
912+
'Second parameter must be boolean',
913+
Text_LanguageDetect_Exception::ERR_PARAM_TYPE
914+
);
925915
}
926916

927917
if (!is_string($str)) {
928-
throw new Text_LanguageDetect_Exception('First parameter was not a string');
918+
throw new Text_LanguageDetect_Exception(
919+
'First parameter was not a string',
920+
Text_LanguageDetect_Exception::ERR_PARAM_TYPE
921+
);
929922
}
930923

931924
$sample_obj = new Text_LanguageDetect_Parser($str);
932925
$sample_obj->prepareUnicode();
933926
$sample_obj->prepareTrigram(false);
934927
$sample_obj->setUnicodeSkipSymbols($skip_symbols);
935928
$sample_obj->analyze();
936-
$blocks =& $sample_obj->getUnicodeBlocks();
929+
$blocks = $sample_obj->getUnicodeBlocks();
937930
unset($sample_obj);
938931
return $blocks;
939932
}
@@ -958,21 +951,30 @@ function unicodeBlockName($unicode) {
958951

959952
// input check
960953
if ($this->utf8strlen($unicode) > 1) {
961-
throw new Text_LanguageDetect_Exception('Pass this function only a single char');
954+
throw new Text_LanguageDetect_Exception(
955+
'Pass this function only a single char',
956+
Text_LanguageDetect_Exception::ERR_PARAM_TYPE
957+
);
962958
}
963959

964960
$unicode = $this->_utf8char2unicode($unicode);
965961

966962
if ($unicode == -1) {
967-
throw new Text_LanguageDetect_Exception('Malformatted char');
963+
throw new Text_LanguageDetect_Exception(
964+
'Malformatted char',
965+
Text_LanguageDetect_Exception::ERR_INVALID_CHAR
966+
);
968967
}
969968

970969
// input check
971970
} elseif (!is_int($unicode)) {
972-
throw new Text_LanguageDetect_Exception('Input must be of type string or int.');
971+
throw new Text_LanguageDetect_Exception(
972+
'Input must be of type string or int.',
973+
Text_LanguageDetect_Exception::ERR_PARAM_TYPE
974+
);
973975
}
974976

975-
$blocks =& $this->_read_unicode_block_db();
977+
$blocks = $this->_read_unicode_block_db();
976978

977979
$result = $this->_unicode_block_name($unicode, $blocks);
978980

@@ -1046,9 +1048,9 @@ function _unicode_block_name($unicode, &$blocks, $block_count = -1) {
10461048
*
10471049
* @access protected
10481050
* @return array the database of unicode block definitions
1049-
* @throws Text_LanguageDetect_Exception
1051+
* @throws Text_LanguageDetect_Exception
10501052
*/
1051-
function &_read_unicode_block_db() {
1053+
function _read_unicode_block_db() {
10521054
// since the unicode definitions are always going to be the same,
10531055
// might as well share the memory for the db with all other instances
10541056
// of this class
@@ -1082,8 +1084,6 @@ function &_read_unicode_block_db() {
10821084
*/
10831085
function languageSimilarity($lang1 = null, $lang2 = null)
10841086
{
1085-
$this->_setup_ok();
1086-
10871087
$lang1 = $this->_convertFromNameMode($lang1);
10881088
$lang2 = $this->_convertFromNameMode($lang2);
10891089
if ($lang1 != null) {
@@ -1184,10 +1184,6 @@ function languageSimilarity($lang1 = null, $lang2 = null)
11841184
function clusterLanguages()
11851185
{
11861186
// todo: set the maximum number of clusters
1187-
1188-
// setup check
1189-
$this->_setup_ok();
1190-
11911187
// return cached result, if any
11921188
if (isset($this->_clusters)) {
11931189
return $this->_clusters;
@@ -1201,7 +1197,10 @@ function clusterLanguages()
12011197

12021198
foreach ($langs as $lang) {
12031199
if (!isset($this->_lang_db[$lang])) {
1204-
throw new Text_LanguageDetect_Exception("missing $lang!\n");
1200+
throw new Text_LanguageDetect_Exception(
1201+
"missing $lang!",
1202+
Text_LanguageDetect_Exception::UNKNOWN_LANGUAGE
1203+
);
12051204
}
12061205
}
12071206

@@ -1229,7 +1228,10 @@ function clusterLanguages()
12291228

12301229
if (!$highest_key1) {
12311230
// should not ever happen
1232-
throw new Text_LanguageDetect_Exception("no highest key? (step: $i)");
1231+
throw new Text_LanguageDetect_Exception(
1232+
"no highest key? (step: $i)",
1233+
Text_LanguageDetect_Exception::NO_HIGHEST_KEY
1234+
);
12331235
}
12341236

12351237
if ($highest_score == 0) {
@@ -1365,11 +1367,10 @@ function clusterLanguages()
13651367
* @access public
13661368
* @param string $str input string
13671369
* @return array language scores (only those compared)
1368-
* @throws Text_LanguageDetect_Exception
1370+
* @throws Text_LanguageDetect_Exception
13691371
*/
13701372
function clusteredSearch($str)
13711373
{
1372-
13731374
// input check
13741375
if (!Text_LanguageDetect_Parser::validateString($str)) {
13751376
return array();

Text/LanguageDetect/Parser.php

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -220,8 +220,7 @@ function analyze()
220220

221221
// unicode startup
222222
if ($this->_compile_unicode) {
223-
$blocks =& $this->_read_unicode_block_db();
224-
223+
$blocks = $this->_read_unicode_block_db();
225224
$block_count = count($blocks);
226225

227226
$skipped_count = 0;

tests/Text_LanguageDetectTest.php

Lines changed: 3 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -15,23 +15,14 @@ class Text_LanguageDetectTest extends PHPUnit_Framework_TestCase {
1515

1616
function setup ()
1717
{
18-
$this->x = new Text_LanguageDetect;
19-
20-
if (!$this->x->_setup_ok($err)) {
21-
$this->markTestSkipped($err->getMessage());
22-
}
18+
$this->x = new Text_LanguageDetect();
2319
}
2420

2521
function tearDown ()
2622
{
2723
unset($this->x);
2824
}
2925

30-
function test_setup ()
31-
{
32-
$err_result = $this->x->_setup_ok($err_obj);
33-
}
34-
3526
function test_splitter ()
3627
{
3728
$str = 'hello';
@@ -1489,13 +1480,9 @@ function test_omit_error ()
14891480
// omit all languages and you should get an error
14901481
$myobj->omitLanguages($myobj->getLanguages());
14911482

1492-
try {
1493-
$result = $myobj->detectSimple($str);
1483+
$result = $myobj->detectSimple($str);
14941484

1495-
$this->fail("Expected an exception for all languages being missing. " . gettype($result));
1496-
} catch (Text_LanguageDetect_Exception $e) {
1497-
$this->assertSame("No languages", $e->getMessage());
1498-
}
1485+
$this->assertNull($result, gettype($result));
14991486
}
15001487

15011488
function test_cyrillic ()
@@ -1581,7 +1568,6 @@ function test_block_detection()
15811568

15821569
foreach ($unicode as $range => $codepoint) {
15831570
$result = $this->x->unicodeBlockName($this->code2utf($codepoint));
1584-
15851571
$this->assertEquals($range, $result, $codepoint);
15861572
}
15871573
}

0 commit comments

Comments
 (0)