Skip to content

Commit 7f9447e

Browse files
author
Nicholas Pisarro
committed
fix for bug #13385: mb_detect_encoding would return FALSE for failed detection, this was not handled properly. also precluded the theoretical possibility that an encoding could be detected which php does not support.
git-svn-id: http://svn.php.net/repository/pear/packages/Text_LanguageDetect/trunk@261817 c90b9560-bf6c-de11-be94-00142212c4b1
1 parent e73a61d commit 7f9447e

File tree

1 file changed

+22
-2
lines changed

1 file changed

+22
-2
lines changed

Text/LanguageDetect.php

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -694,9 +694,29 @@ function detect($sample, $limit = 0)
694694
if (function_exists('mb_detect_encoding')
695695
&& function_exists('mb_convert_encoding')) {
696696

697+
// mb_detect_encoding isn't very reliable, to say the least
698+
// detection should still work with a sufficient sample of ascii characters
697699
$encoding = mb_detect_encoding($sample);
698-
if ($encoding != 'ASCII' && $encoding != 'UTF-8') {
699-
$sample = mb_convert_encoding($sample, 'UTF-8', $encoding);
700+
701+
// mb_detect_encoding() will return FALSE if detection fails
702+
// don't attempt conversion if that's the case
703+
if ($encoding != 'ASCII' && $encoding != 'UTF-8' && $encoding !== false) {
704+
705+
if (function_exists('mb_list_encodings')) {
706+
707+
// verify the encoding exists in mb_list_encodings
708+
if (in_array($encoding, mb_list_encodings())) {
709+
$sample = mb_convert_encoding($sample, 'UTF-8', $encoding);
710+
}
711+
712+
// if the previous condition failed:
713+
// somehow we detected an encoding that also we don't support
714+
715+
} else {
716+
// php 4 doesnt have mb_list_encodings()
717+
// so attempt with error suppression
718+
$sample = @mb_convert_encoding($sample, 'UTF-8', $encoding);
719+
}
700720
}
701721
}
702722

0 commit comments

Comments
 (0)