@@ -14,6 +14,7 @@ class HtmlDiff
1414 private $ specialCaseOpeningTags = array ();
1515 private $ specialCaseClosingTags = array ();
1616 private $ specialCaseTags = array ('strong ' , 'b ' , 'i ' , 'big ' , 'small ' , 'u ' , 'sub ' , 'sup ' , 'strike ' , 's ' , 'p ' );
17+ private $ specialCaseChars = array ('. ' , ', ' );
1718 private $ groupDiffs = true ;
1819
1920 public function __construct ($ oldText , $ newText , $ encoding = 'UTF-8 ' , $ specialCaseTags = array (), $ groupDiffs = true )
@@ -173,13 +174,18 @@ private function splitInputsToWords()
173174 $ this ->oldWords = $ this ->convertHtmlToListOfWords ( $ this ->explode ( $ this ->oldText ) );
174175 $ this ->newWords = $ this ->convertHtmlToListOfWords ( $ this ->explode ( $ this ->newText ) );
175176 }
177+
178+ private function isSingleWord ($ text )
179+ {
180+ return ctype_alnum (str_replace ($ this ->specialCaseChars , '' , $ text ));
181+ }
176182
177183 private function convertHtmlToListOfWords ($ characterString )
178184 {
179185 $ mode = 'character ' ;
180186 $ current_word = '' ;
181187 $ words = array ();
182- foreach ($ characterString as $ character ) {
188+ foreach ($ characterString as $ i => $ character ) {
183189 switch ($ mode ) {
184190 case 'character ' :
185191 if ( $ this ->isStartOfTag ( $ character ) ) {
@@ -195,7 +201,10 @@ private function convertHtmlToListOfWords($characterString)
195201 $ current_word = $ character ;
196202 $ mode = 'whitespace ' ;
197203 } else {
198- if ( ctype_alnum ( $ character ) && ( strlen ($ current_word ) == 0 || ctype_alnum ( $ current_word ) ) ) {
204+ if (
205+ (ctype_alnum ($ character ) && (strlen ($ current_word ) == 0 || $ this ->isSingleWord ($ current_word ))) ||
206+ (in_array ($ character , $ this ->specialCaseChars ) && isset ($ characterString [$ i +1 ]) && $ this ->isSingleWord ($ characterString [$ i +1 ]))
207+ ) {
199208 $ current_word .= $ character ;
200209 } else {
201210 $ words [] = $ current_word ;
0 commit comments