@@ -9,7 +9,7 @@ class HtmlDiff {
99 private $ newWords = array ();
1010 private $ wordIndices ;
1111 private $ encoding ;
12- private $ specialCaseOpeningTags = array ( "/<strong[^>]+> /i " , "/<b[^>]+> /i " , "/<i[^>]+> /i " , "/<big[^>]+> /i " , "/<small[^>]+> /i " , "/<u[^>]+> /i " , "/<sub[^>]+> /i " , "/<sup[^>]+> /i " , "/<strike[^>]+> /i " , "/<s[^>]+> /i " , '/<p[^>]+> /i ' );
12+ private $ specialCaseOpeningTags = array ( "/<strong[^>]* /i " , "/<b[^>]* /i " , "/<i[^>]* /i " , "/<big[^>]* /i " , "/<small[^>]* /i " , "/<u[^>]* /i " , "/<sub[^>]* /i " , "/<sup[^>]* /i " , "/<strike[^>]* /i " , "/<s[^>]* /i " , '/<p[^>]* /i ' );
1313 private $ specialCaseClosingTags = array ( "</strong> " , "</b> " , "</i> " , "</big> " , "</small> " , "</u> " , "</sub> " , "</sup> " , "</strike> " , "</s> " , '</p> ' );
1414
1515 public function __construct ( $ oldText , $ newText , $ encoding = 'UTF-8 ' ) {
@@ -31,18 +31,27 @@ public function getDifference() {
3131 return $ this ->content ;
3232 }
3333
34+ private function getStringBetween ( $ str , $ start , $ end ) {
35+ $ expStr = explode ( $ start , $ str , 2 );
36+ if ( count ( $ expStr ) > 1 ) {
37+ $ expStr = explode ( $ end , $ expStr [ 1 ] );
38+ if ( count ( $ expStr ) > 1 ) {
39+ array_pop ( $ expStr );
40+ return implode ( $ end , $ expStr );
41+ }
42+ }
43+ return '' ;
44+ }
45+
3446 private function purifyHtml ( $ html , $ tags = null ) {
35- if ( class_exists ( 'DOMDocument ' ) && false ) {
36- libxml_use_internal_errors ( true );
37- $ dom = new DOMDocument ( '1.0 ' , $ this ->encoding );
38- $ dom ->recover = true ;
39- $ dom ->strictErrorChecking = false ;
40- $ dom ->loadXML ( $ html );
41- $ xml = trim ( str_replace ( '<?xml version="1.0"?> ' , '' , $ dom ->saveXML () ) );
42- return $ xml ? $ xml : $ html ;
43- } else {
44- return $ html ;
47+ if ( class_exists ( 'Tidy ' ) && false ) {
48+ $ config = array ( 'output-xhtml ' => true , 'indent ' => false );
49+ $ tidy = new tidy ;
50+ $ tidy ->parseString ( $ html , $ config , 'utf8 ' );
51+ $ html = ( string )$ tidy ;
52+ return $ this ->getStringBetween ( $ html , '<body> ' );
4553 }
54+ return $ html ;
4655 }
4756
4857 public function build () {
@@ -238,7 +247,7 @@ private function InsertTag( $tag, $cssClass, &$words ) {
238247 }
239248 }
240249 if ( $ firstOrDefault ) {
241- $ specialCaseTagInjection = " <ins class=' mod'> " ;
250+ $ specialCaseTagInjection = ' <ins class=" mod"> ' ;
242251 if ( $ tag == "del " ) {
243252 unset( $ words [ 0 ] );
244253 }
@@ -257,16 +266,15 @@ private function InsertTag( $tag, $cssClass, &$words ) {
257266 $ this ->content .= $ specialCaseTagInjection . implode ( "" , $ this ->ExtractConsecutiveWords ( $ words , 'tag ' ) );
258267 } else {
259268 $ workTag = $ this ->ExtractConsecutiveWords ( $ words , 'tag ' );
260-
261- if ( $ this ->IsOpeningTag ( $ workTag [ 0 ] ) && !$ this ->IsClosingTag ( $ workTag [ 0 ] ) ) {
262- if ( strpos ( $ workTag [ 0 ], 'class= ' ) ) {
263- $ workTag [ 0 ] = str_replace ( 'class=" ' , 'class="diffmod ' , $ workTag [ 0 ] );
264- $ workTag [ 0 ] = str_replace ( "class=' " , "class='diffmod " , $ workTag [ 0 ] );
265- } else {
266- $ workTag [ 0 ] = str_replace ( "> " , " class='diffmod'> " , $ workTag [ 0 ] );
267- }
268- }
269- $ this ->content .= implode ( "" , $ workTag ) . $ specialCaseTagInjection ;
269+ if ( $ this ->IsOpeningTag ( $ workTag [ 0 ] ) && !$ this ->IsClosingTag ( $ workTag [ 0 ] ) ) {
270+ if ( strpos ( $ workTag [ 0 ], 'class= ' ) ) {
271+ $ workTag [ 0 ] = str_replace ( 'class=" ' , 'class="diffmod ' , $ workTag [ 0 ] );
272+ $ workTag [ 0 ] = str_replace ( "class=' " , 'class="diffmod ' , $ workTag [ 0 ] );
273+ } else {
274+ $ workTag [ 0 ] = str_replace ( "> " , ' class="diffmod"> ' , $ workTag [ 0 ] );
275+ }
276+ }
277+ $ this ->content .= implode ( "" , $ workTag ) . $ specialCaseTagInjection ;
270278 }
271279 }
272280 }
0 commit comments