43
43
*
44
44
* echo "Supported languages:\n";
45
45
*
46
- * $langs = $l->getLanguages();
47
- * if (PEAR::isError($langs)) {
48
- * die($langs->getMessage());
46
+ * try {
47
+ * $langs = $l->getLanguages();
48
+ * } catch (Text_LanguageDetect_Exception $e) {
49
+ * die($e->getMessage());
49
50
* }
50
51
*
51
52
* sort($langs);
@@ -104,9 +105,6 @@ class Text_LanguageDetect
104
105
*
105
106
* Will be loaded on start from $this->_db_filename
106
107
*
107
- * May be set to a PEAR_Error object if there is an error during its
108
- * initialization
109
- *
110
108
* @var array
111
109
* @access private
112
110
*/
@@ -120,14 +118,6 @@ class Text_LanguageDetect
120
118
*/
121
119
var $ _unicode_map ;
122
120
123
- /**
124
- * stores any errors during setup
125
- *
126
- * @access private
127
- * @var PEAR_Error
128
- */
129
- var $ _setup_error ;
130
-
131
121
/**
132
122
* The size of the trigram data arrays
133
123
*
@@ -186,29 +176,26 @@ class Text_LanguageDetect
186
176
/**
187
177
* Constructor
188
178
*
189
- * Will attempt to load the language database. If it fails, you will get
190
- * a PEAR_Error object returned when you try to use detect()
179
+ * Will attempt to load the language database.
191
180
*
181
+ * @throws Text_LanguageDetect_Exception
182
+ * @todo Avoid work in the constructor
192
183
*/
193
184
function Text_LanguageDetect ()
194
185
{
195
186
$ data = $ this ->_readdb ($ this ->_db_filename );
196
- if (PEAR ::isError ($ data )) {
197
- // if error, save the error message
198
- $ this ->_setup_error = $ data ;
199
-
200
- } else {
201
- $ this ->_lang_db = $ data ['trigram ' ];
187
+
188
+ $ this ->_lang_db = $ data ['trigram ' ];
202
189
203
- if (isset ($ data ['trigram-unicodemap ' ])) {
204
- $ this ->_unicode_map = $ data ['trigram-unicodemap ' ];
205
- }
190
+ if (isset ($ data ['trigram-unicodemap ' ])) {
191
+ $ this ->_unicode_map = $ data ['trigram-unicodemap ' ];
192
+ }
206
193
207
- // Not yet implemented:
208
- if (isset ($ data ['trigram-clusters ' ])) {
209
- $ this ->_clusters = $ data ['trigram-clusters ' ];
210
- }
194
+ // Not yet implemented:
195
+ if (isset ($ data ['trigram-clusters ' ])) {
196
+ $ this ->_clusters = $ data ['trigram-clusters ' ];
211
197
}
198
+
212
199
}
213
200
214
201
/**
@@ -244,7 +231,7 @@ function _get_data_loc($fname)
244
231
* @access private
245
232
* @param string $fname the filename where the data is stored
246
233
* @return array the language model data
247
- * @throws PEAR_Error
234
+ * @throws Text_LanguageDetect_Exception
248
235
*/
249
236
function _readdb ($ fname )
250
237
{
@@ -276,18 +263,12 @@ function _readdb($fname)
276
263
* Checks if this object is ready to detect languages
277
264
*
278
265
* @access private
279
- * @param mixed &$err error object to be returned by reference, if any
280
266
* @return bool true if no errors
267
+ * @throws Text_LanguageDetect_Exception
281
268
*/
282
- function _setup_ok (& $ err )
269
+ function _setup_ok ()
283
270
{
284
- if (PEAR ::isError ($ this ->_setup_error )) {
285
- // if there was an error from when the language database was loaded
286
- // then return that error
287
- $ err = $ this ->_setup_error ;
288
- return false ;
289
-
290
- } elseif (!is_array ($ this ->_lang_db )) {
271
+ if (!is_array ($ this ->_lang_db )) {
291
272
if (ini_get ('magic_quotes_runtime ' )) {
292
273
throw new Text_LanguageDetect_Exception ('Error loading database. Try turning magic_quotes_runtime off. ' );
293
274
} else {
@@ -314,15 +295,11 @@ function _setup_ok(&$err)
314
295
* @param bool $include_only if true will include (rather than
315
296
* exclude) only those in the list
316
297
* @return int number of languages successfully deleted
317
- * @throws PEAR_Error
298
+ * @throws Text_LanguageDetect_Exception
318
299
*/
319
300
function omitLanguages ($ omit_list , $ include_only = false )
320
301
{
321
-
322
- // setup check
323
- if (!$ this ->_setup_ok ($ err )) {
324
- return $ err ;
325
- }
302
+ $ this ->_setup_ok ();
326
303
327
304
$ deleted = 0 ;
328
305
@@ -379,15 +356,13 @@ function omitLanguages($omit_list, $include_only = false)
379
356
*
380
357
* @access public
381
358
* @return int the number of languages
382
- * @throws PEAR_Error
359
+ * @throws Text_LanguageDetect_Exception
383
360
*/
384
361
function getLanguageCount ()
385
362
{
386
- if (!$ this ->_setup_ok ($ err )) {
387
- return $ err ;
388
- } else {
389
- return count ($ this ->_lang_db );
390
- }
363
+ $ this ->_setup_ok ();
364
+
365
+ return count ($ this ->_lang_db );
391
366
}
392
367
393
368
/**
@@ -398,31 +373,29 @@ function getLanguageCount()
398
373
* @access public
399
374
* @param mixed $lang language name or array of language names
400
375
* @return bool true if language model exists
401
- * @throws PEAR_Error
376
+ * @throws Text_LanguageDetect_Exception
402
377
*/
403
378
function languageExists ($ lang )
404
379
{
405
- if (!$ this ->_setup_ok ($ err )) {
406
- return $ err ;
407
- } else {
408
- $ lang = $ this ->_convertFromNameMode ($ lang );
409
- // string
410
- if (is_string ($ lang )) {
411
- return isset ($ this ->_lang_db [strtolower ($ lang )]);
412
-
413
- // array
414
- } elseif (is_array ($ lang )) {
415
- foreach ($ lang as $ test_lang ) {
416
- if (!isset ($ this ->_lang_db [strtolower ($ test_lang )])) {
417
- return false ;
418
- }
419
- }
420
- return true ;
421
-
422
- // other (error)
423
- } else {
424
- throw new Text_LanguageDetect_Exception ('Unknown type passed to languageExists() ' );
380
+ $ this ->_setup_ok ();
381
+
382
+ $ lang = $ this ->_convertFromNameMode ($ lang );
383
+ // string
384
+ if (is_string ($ lang )) {
385
+ return isset ($ this ->_lang_db [strtolower ($ lang )]);
386
+
387
+ // array
388
+ } elseif (is_array ($ lang )) {
389
+ foreach ($ lang as $ test_lang ) {
390
+ if (!isset ($ this ->_lang_db [strtolower ($ test_lang )])) {
391
+ return false ;
392
+ }
425
393
}
394
+ return true ;
395
+
396
+ // other (error)
397
+ } else {
398
+ throw new Text_LanguageDetect_Exception ('Unknown type passed to languageExists() ' );
426
399
}
427
400
}
428
401
@@ -431,17 +404,15 @@ function languageExists($lang)
431
404
*
432
405
* @access public
433
406
* @return array the names of the languages known to this object
434
- * @throws PEAR_Error
407
+ * @throws Text_LanguageDetect_Exception
435
408
*/
436
409
function getLanguages ()
437
410
{
438
- if (!$ this ->_setup_ok ($ err )) {
439
- return $ err ;
440
- } else {
441
- return $ this ->_convertToNameMode (
442
- array_keys ($ this ->_lang_db )
443
- );
444
- }
411
+ $ this ->_setup_ok ();
412
+
413
+ return $ this ->_convertToNameMode (
414
+ array_keys ($ this ->_lang_db )
415
+ );
445
416
}
446
417
447
418
/**
@@ -700,16 +671,13 @@ function _normalize_score($score, $base_count = null)
700
671
* @param int $limit if specified, return an array of the most likely
701
672
* $limit languages and their scores.
702
673
* @return mixed sorted array of language scores, blank array if no
703
- * useable text was found, or PEAR_Error if error
704
- * with the object setup
674
+ * useable text was found
705
675
* @see _distance()
706
- * @throws PEAR_Error
676
+ * @throws Text_LanguageDetect_Exception
707
677
*/
708
678
function detect ($ sample , $ limit = 0 )
709
679
{
710
- if (!$ this ->_setup_ok ($ err )) {
711
- return $ err ;
712
- }
680
+ $ this ->_setup_ok ();
713
681
714
682
// input check
715
683
if (!Text_LanguageDetect_Parser::validateString ($ sample )) {
@@ -855,16 +823,12 @@ function detect($sample, $limit = 0)
855
823
* @return string the name of the most likely language
856
824
* or null if no language is similar
857
825
* @see detect()
858
- * @throws PEAR_Error
826
+ * @throws Text_LanguageDetect_Exception
859
827
*/
860
828
function detectSimple ($ sample )
861
829
{
862
830
$ scores = $ this ->detect ($ sample , 1 );
863
831
864
- if (PEAR ::isError ($ scores )) {
865
- return $ scores ;
866
- }
867
-
868
832
// if top language has the maximum possible score,
869
833
// then the top score will have been picked at random
870
834
if ( !is_array ($ scores )
@@ -901,16 +865,12 @@ function detectSimple($sample)
901
865
* @return array most similar language, score and confidence rating
902
866
* or null if no language is similar
903
867
* @see detect()
904
- * @throws PEAR_Error
868
+ * @throws Text_LanguageDetect_Exception
905
869
*/
906
870
function detectConfidence ($ sample )
907
871
{
908
872
$ scores = $ this ->detect ($ sample , 2 );
909
873
910
- if (PEAR ::isError ($ scores )) {
911
- return $ scores ;
912
- }
913
-
914
874
// if most similar language has the max score, it
915
875
// will have been picked at random
916
876
if ( !is_array ($ scores )
@@ -955,7 +915,7 @@ function detectConfidence($sample)
955
915
* non-printing characters. Includes spaces,
956
916
* newlines and common punctutation characters.
957
917
* @return array
958
- * @throws PEAR_Error
918
+ * @throws Text_LanguageDetect_Exception
959
919
*/
960
920
function detectUnicodeBlocks ($ str , $ skip_symbols )
961
921
{
@@ -990,7 +950,7 @@ function detectUnicodeBlocks($str, $skip_symbols)
990
950
* @access public
991
951
* @param mixed $unicode unicode value or utf8 char
992
952
* @return mixed the block name string or false if not found
993
- * @throws PEAR_Error
953
+ * @throws Text_LanguageDetect_Exception
994
954
*/
995
955
function unicodeBlockName ($ unicode ) {
996
956
if (is_string ($ unicode )) {
@@ -1014,11 +974,6 @@ function unicodeBlockName($unicode) {
1014
974
1015
975
$ blocks =& $ this ->_read_unicode_block_db ();
1016
976
1017
- // there might have been a setup error for the block database
1018
- if (PEAR ::isError ($ blocks )) {
1019
- return $ blocks ;
1020
- }
1021
-
1022
977
$ result = $ this ->_unicode_block_name ($ unicode , $ blocks );
1023
978
1024
979
if ($ result == -1 ) {
@@ -1091,7 +1046,7 @@ function _unicode_block_name($unicode, &$blocks, $block_count = -1) {
1091
1046
*
1092
1047
* @access protected
1093
1048
* @return array the database of unicode block definitions
1094
- * @throws PEAR_Error
1049
+ * @throws Text_LanguageDetect_Exception
1095
1050
*/
1096
1051
function &_read_unicode_block_db () {
1097
1052
// since the unicode definitions are always going to be the same,
@@ -1123,13 +1078,11 @@ function &_read_unicode_block_db() {
1123
1078
* @return array scores of every language compared
1124
1079
* or the score of just the provided languages
1125
1080
* or null if one of the supplied languages does not exist
1126
- * @throws PEAR_Error
1081
+ * @throws Text_LanguageDetect_Exception
1127
1082
*/
1128
1083
function languageSimilarity ($ lang1 = null , $ lang2 = null )
1129
1084
{
1130
- if (!$ this ->_setup_ok ($ err )) {
1131
- return $ err ;
1132
- }
1085
+ $ this ->_setup_ok ();
1133
1086
1134
1087
$ lang1 = $ this ->_convertFromNameMode ($ lang1 );
1135
1088
$ lang2 = $ this ->_convertFromNameMode ($ lang2 );
@@ -1223,7 +1176,7 @@ function languageSimilarity($lang1 = null, $lang2 = null)
1223
1176
*
1224
1177
* @access public
1225
1178
* @return array language cluster data
1226
- * @throws PEAR_Error
1179
+ * @throws Text_LanguageDetect_Exception
1227
1180
* @see languageSimilarity()
1228
1181
* @deprecated this function will eventually be removed and placed into
1229
1182
* the model generation class
@@ -1233,9 +1186,7 @@ function clusterLanguages()
1233
1186
// todo: set the maximum number of clusters
1234
1187
1235
1188
// setup check
1236
- if (!$ this ->_setup_ok ($ err )) {
1237
- return $ err ;
1238
- }
1189
+ $ this ->_setup_ok ();
1239
1190
1240
1191
// return cached result, if any
1241
1192
if (isset ($ this ->_clusters )) {
@@ -1414,7 +1365,7 @@ function clusterLanguages()
1414
1365
* @access public
1415
1366
* @param string $str input string
1416
1367
* @return array language scores (only those compared)
1417
- * @throws PEAR_Error
1368
+ * @throws Text_LanguageDetect_Exception
1418
1369
*/
1419
1370
function clusteredSearch ($ str )
1420
1371
{
@@ -1428,10 +1379,6 @@ function clusteredSearch($str)
1428
1379
// so it's safe to call it every time
1429
1380
$ result = $ this ->clusterLanguages ();
1430
1381
1431
- if (PEAR ::isError ($ result )) {
1432
- return $ result ;
1433
- }
1434
-
1435
1382
$ dendogram_start = $ result ['open_forks ' ];
1436
1383
$ dendogram_data = $ result ['fork_data ' ];
1437
1384
$ dendogram_alias = $ result ['name_map ' ];
0 commit comments