@@ -69,95 +69,85 @@ class Text_LanguageDetect
69
69
* If this value starts with a slash (/) or a dot (.) the value of
70
70
* $this->_data_dir will be ignored
71
71
*
72
- * @var string
73
- * @access private
72
+ * @var string
74
73
*/
75
- var $ _db_filename = 'lang.dat ' ;
74
+ protected $ _db_filename = 'lang.dat ' ;
76
75
77
76
/**
78
77
* The filename that stores the unicode block definitions
79
78
*
80
79
* If this value starts with a slash (/) or a dot (.) the value of
81
80
* $this->_data_dir will be ignored
82
81
*
83
- * @var string
84
- * @access private
82
+ * @var string
85
83
*/
86
- var $ _unicode_db_filename = 'unicode_blocks.dat ' ;
84
+ protected $ _unicode_db_filename = 'unicode_blocks.dat ' ;
87
85
88
86
/**
89
87
* The data directory
90
88
*
91
89
* Should be set by PEAR installer
92
90
*
93
- * @var string
94
- * @access private
91
+ * @var string
95
92
*/
96
- var $ _data_dir = '@data_dir@ ' ;
93
+ protected $ _data_dir = '@data_dir@ ' ;
97
94
98
95
/**
99
96
* The trigram data for comparison
100
97
*
101
98
* Will be loaded on start from $this->_db_filename
102
99
*
103
- * @var array
104
- * @access private
100
+ * @var array
105
101
*/
106
- var $ _lang_db = array ();
102
+ protected $ _lang_db = array ();
107
103
108
104
/**
109
105
* Stores the map of the trigram data to unicode characters
110
106
*
111
- * @access private
112
- * @var array
107
+ * @var array
113
108
*/
114
- var $ _unicode_map ;
109
+ protected $ _unicode_map ;
115
110
116
111
/**
117
112
* The size of the trigram data arrays
118
113
*
119
- * @var int
120
- * @access private
114
+ * @var int
121
115
*/
122
- var $ _threshold = 300 ;
116
+ protected $ _threshold = 300 ;
123
117
124
118
/**
125
119
* The maximum possible score.
126
120
*
127
121
* Needed for score normalization. Different depending on the
128
122
* perl compatibility setting
129
123
*
130
- * @access private
131
- * @var int
132
- * @see setPerlCompatible()
124
+ * @var int
125
+ * @see setPerlCompatible()
133
126
*/
134
- var $ _max_score = 0 ;
127
+ protected $ _max_score = 0 ;
135
128
136
129
/**
137
130
* Whether or not to simulate perl's Language::Guess exactly
138
131
*
139
- * @access private
140
- * @var bool
141
- * @see setPerlCompatible()
132
+ * @var bool
133
+ * @see setPerlCompatible()
142
134
*/
143
- var $ _perl_compatible = false ;
135
+ protected $ _perl_compatible = false ;
144
136
145
137
/**
146
138
* Whether to use the unicode block detection to speed up processing
147
139
*
148
- * @access private
149
- * @var bool
140
+ * @var bool
150
141
*/
151
- var $ _use_unicode_narrowing = true ;
142
+ protected $ _use_unicode_narrowing = true ;
152
143
153
144
/**
154
145
* Stores the result of the clustering operation
155
146
*
156
- * @access private
157
- * @var array
158
- * @see clusterLanguages()
147
+ * @var array
148
+ * @see clusterLanguages()
159
149
*/
160
- var $ _clusters ;
150
+ protected $ _clusters ;
161
151
162
152
/**
163
153
* Which type of "language names" are accepted and returned:
@@ -166,15 +156,15 @@ class Text_LanguageDetect
166
156
* 2 - 2-letter ISO 639-1 code ("en")
167
157
* 3 - 3-letter ISO 639-2 code ("eng")
168
158
*/
169
- var $ _name_mode = 0 ;
159
+ protected $ _name_mode = 0 ;
170
160
171
161
/**
172
162
* Constructor
173
163
*
174
164
* Will attempt to load the language database. If it fails, you will get
175
165
* an exception.
176
166
*/
177
- function __construct ()
167
+ public function __construct ()
178
168
{
179
169
$ data = $ this ->_readdb ($ this ->_db_filename );
180
170
$ this ->_checkTrigram ($ data ['trigram ' ]);
@@ -196,9 +186,8 @@ function __construct()
196
186
* @param string $fname File name to load
197
187
*
198
188
* @return string expected path to the language model database
199
- * @access private
200
189
*/
201
- function _get_data_loc($ fname )
190
+ protected function _get_data_loc($ fname )
202
191
{
203
192
if ($ fname {0 } == '/ ' || $ fname {0 } == '. ' ) {
204
193
// if filename starts with a slash, assume it's an absolute pathname
@@ -225,9 +214,8 @@ function _get_data_loc($fname)
225
214
*
226
215
* @return array the language model data
227
216
* @throws Text_LanguageDetect_Exception
228
- * @access private
229
217
*/
230
- function _readdb ($ fname )
218
+ protected function _readdb ($ fname )
231
219
{
232
220
// finds the correct data dir
233
221
$ fname = $ this ->_get_data_loc ($ fname );
@@ -255,9 +243,8 @@ function _readdb($fname)
255
243
* @param array $trigram Trigram data from database
256
244
*
257
245
* @return void
258
- * @access private
259
246
*/
260
- function _checkTrigram ($ trigram )
247
+ protected function _checkTrigram ($ trigram )
261
248
{
262
249
if (!is_array ($ trigram )) {
263
250
if (ini_get ('magic_quotes_runtime ' )) {
@@ -349,11 +336,10 @@ public function omitLanguages($omit_list, $include_only = false)
349
336
/**
350
337
* Returns the number of languages that this object can detect
351
338
*
352
- * @access public
353
339
* @return int the number of languages
354
340
* @throws Text_LanguageDetect_Exception
355
341
*/
356
- function getLanguageCount ()
342
+ public function getLanguageCount ()
357
343
{
358
344
return count ($ this ->_lang_db );
359
345
}
@@ -391,11 +377,10 @@ public function languageExists($lang)
391
377
/**
392
378
* Returns the list of detectable languages
393
379
*
394
- * @access public
395
380
* @return array the names of the languages known to this object<<<<<<<
396
381
* @throws Text_LanguageDetect_Exception
397
382
*/
398
- function getLanguages ()
383
+ public function getLanguages ()
399
384
{
400
385
return $ this ->_convertToNameMode (
401
386
array_keys ($ this ->_lang_db )
@@ -433,7 +418,7 @@ public function setPerlCompatible($setting = true)
433
418
*
434
419
* @return void
435
420
*/
436
- function setNameMode ($ name_mode )
421
+ public function setNameMode ($ name_mode )
437
422
{
438
423
$ this ->_name_mode = $ name_mode ;
439
424
}
@@ -463,10 +448,9 @@ public function useUnicodeBlocks($setting = true)
463
448
* @param string $text text to convert
464
449
*
465
450
* @return array array of trigram frequencies
466
- * @access private
467
451
* @deprecated Superceded by the Text_LanguageDetect_Parser class
468
452
*/
469
- function _trigram ($ text )
453
+ protected function _trigram ($ text )
470
454
{
471
455
$ s = new Text_LanguageDetect_Parser ($ text );
472
456
$ s ->prepareTrigram ();
@@ -484,9 +468,8 @@ function _trigram($text)
484
468
* @param array $arr array of trigram
485
469
*
486
470
* @return array ranks of trigrams
487
- * @access protected
488
471
*/
489
- function _arr_rank ($ arr )
472
+ protected function _arr_rank ($ arr )
490
473
{
491
474
492
475
// sorts alphabetically first as a standard way of breaking rank ties
@@ -517,9 +500,8 @@ function _arr_rank($arr)
517
500
* @param array $arr the array to sort
518
501
*
519
502
* @return void
520
- * @access private
521
503
*/
522
- function _bub_sort (&$ arr )
504
+ protected function _bub_sort (&$ arr )
523
505
{
524
506
// should do the same as this perl statement:
525
507
// sort { $trigrams{$b} == $trigrams{$a}
@@ -557,9 +539,8 @@ function _bub_sort(&$arr)
557
539
*
558
540
* @return int 1 if $a is greater, -1 if not
559
541
* @see _bub_sort()
560
- * @access private
561
542
*/
562
- function _sort_func ($ a , $ b )
543
+ protected function _sort_func ($ a , $ b )
563
544
{
564
545
// each is actually a key/value pair, so that it can compare using both
565
546
list ($ a_key , $ a_value ) = $ a ;
@@ -597,9 +578,8 @@ function _sort_func($a, $b)
597
578
*
598
579
* @return int the sum of the differences between the ranks of
599
580
* the two trigram sets
600
- * @access private
601
581
*/
602
- function _distance ($ arr1 , $ arr2 )
582
+ protected function _distance ($ arr1 , $ arr2 )
603
583
{
604
584
$ sumdist = 0 ;
605
585
@@ -630,9 +610,8 @@ function _distance($arr1, $arr2)
630
610
*
631
611
* @return float the normalized score
632
612
* @see _distance()
633
- * @access private
634
613
*/
635
- function _normalize_score ($ score , $ base_count = null )
614
+ protected function _normalize_score ($ score , $ base_count = null )
636
615
{
637
616
if ($ base_count === null ) {
638
617
$ base_count = $ this ->_threshold ;
@@ -971,9 +950,8 @@ public function unicodeBlockName($unicode)
971
950
*
972
951
* @return mixed Block name, -1 if it failed
973
952
* @see unicodeBlockName()
974
- * @access protected
975
953
*/
976
- function _unicode_block_name ($ unicode , $ blocks , $ block_count = -1 )
954
+ protected function _unicode_block_name ($ unicode , $ blocks , $ block_count = -1 )
977
955
{
978
956
// for a reference, see
979
957
// http://www.unicode.org/Public/UNIDATA/Blocks.txt
@@ -1024,9 +1002,8 @@ function _unicode_block_name($unicode, $blocks, $block_count = -1)
1024
1002
*
1025
1003
* @return array the database of unicode block definitions
1026
1004
* @throws Text_LanguageDetect_Exception
1027
- * @access protected
1028
1005
*/
1029
- function _read_unicode_block_db ()
1006
+ protected function _read_unicode_block_db ()
1030
1007
{
1031
1008
// since the unicode definitions are always going to be the same,
1032
1009
// might as well share the memory for the db with all other instances
@@ -1145,14 +1122,13 @@ public function languageSimilarity($lang1 = null, $lang2 = null)
1145
1122
* Uses a nearest neighbor technique to generate the maximum possible
1146
1123
* number of dendograms from the similarity data.
1147
1124
*
1148
- * @access public
1149
1125
* @return array language cluster data
1150
1126
* @throws Text_LanguageDetect_Exception
1151
1127
* @see languageSimilarity()
1152
1128
* @deprecated this function will eventually be removed and placed into
1153
1129
* the model generation class
1154
1130
*/
1155
- function clusterLanguages ()
1131
+ public function clusterLanguages ()
1156
1132
{
1157
1133
// todo: set the maximum number of clusters
1158
1134
// return cached result, if any
@@ -1485,10 +1461,9 @@ public static function utf8strlen($str)
1485
1461
* @param string $char a utf8 (possibly multi-byte) char
1486
1462
*
1487
1463
* @return int unicode value
1488
- * @access protected
1489
1464
* @link http://en.wikipedia.org/wiki/UTF-8
1490
1465
*/
1491
- function _utf8char2unicode ($ char )
1466
+ protected function _utf8char2unicode ($ char )
1492
1467
{
1493
1468
// strlen() here will actually get the binary length of a single char
1494
1469
switch (strlen ($ char )) {
@@ -1536,9 +1511,8 @@ function _utf8char2unicode($char)
1536
1511
* @param bool $special_convert whether to do special conversions
1537
1512
*
1538
1513
* @return char the next (possibly multi-byte) char from $counter
1539
- * @access private
1540
1514
*/
1541
- static function _next_char ($ str , &$ counter , $ special_convert = false )
1515
+ protected static function _next_char($ str , &$ counter , $ special_convert = false )
1542
1516
{
1543
1517
$ char = $ str {$ counter ++};
1544
1518
$ ord = ord ($ char );
@@ -1630,7 +1604,7 @@ static function _next_char($str, &$counter, $special_convert = false)
1630
1604
*
1631
1605
* @return string|array Language name
1632
1606
*/
1633
- function _convertFromNameMode ($ lang , $ convertKey = false )
1607
+ protected function _convertFromNameMode ($ lang , $ convertKey = false )
1634
1608
{
1635
1609
if ($ this ->_name_mode == 0 ) {
1636
1610
return $ lang ;
@@ -1670,7 +1644,7 @@ function _convertFromNameMode($lang, $convertKey = false)
1670
1644
*
1671
1645
* @return string|array Language name
1672
1646
*/
1673
- function _convertToNameMode ($ lang , $ convertKey = false )
1647
+ protected function _convertToNameMode ($ lang , $ convertKey = false )
1674
1648
{
1675
1649
if ($ this ->_name_mode == 0 ) {
1676
1650
return $ lang ;
0 commit comments