Skip to content

Commit 385a3df

Browse files
committed
Created TableDiff class
1 parent d285ea6 commit 385a3df

File tree

4 files changed

+280
-93
lines changed

4 files changed

+280
-93
lines changed

demo/index.php

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
require __DIR__.'/../lib/Caxy/HtmlDiff/AbstractDiff.php';
66
require __DIR__.'/../lib/Caxy/HtmlDiff/HtmlDiff.php';
7+
require __DIR__.'/../lib/Caxy/HtmlDiff/TableDiff.php';
78
require __DIR__.'/../lib/Caxy/HtmlDiff/Match.php';
89
require __DIR__.'/../lib/Caxy/HtmlDiff/Operation.php';
910

lib/Caxy/HtmlDiff/AbstractDiff.php

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@ abstract class AbstractDiff
1111
protected $content;
1212
protected $oldText;
1313
protected $newText;
14+
protected $oldWords = array();
15+
protected $newWords = array();
1416
protected $encoding;
1517
protected $specialCaseOpeningTags = array();
1618
protected $specialCaseClosingTags = array();
@@ -173,4 +175,111 @@ protected function purifyHtml($html, $tags = null)
173175

174176
return $html;
175177
}
178+
179+
protected function splitInputsToWords()
180+
{
181+
$this->oldWords = $this->convertHtmlToListOfWords( $this->explode( $this->oldText ) );
182+
$this->newWords = $this->convertHtmlToListOfWords( $this->explode( $this->newText ) );
183+
}
184+
185+
protected function isPartOfWord($text)
186+
{
187+
return ctype_alnum(str_replace($this->specialCaseChars, '', $text));
188+
}
189+
190+
protected function convertHtmlToListOfWords($characterString)
191+
{
192+
$mode = 'character';
193+
$current_word = '';
194+
$words = array();
195+
foreach ($characterString as $i => $character) {
196+
switch ($mode) {
197+
case 'character':
198+
if ( $this->isStartOfTag( $character ) ) {
199+
if ($current_word != '') {
200+
$words[] = $current_word;
201+
}
202+
$current_word = "<";
203+
$mode = 'tag';
204+
} elseif ( preg_match( "[^\s]", $character ) > 0 ) {
205+
if ($current_word != '') {
206+
$words[] = $current_word;
207+
}
208+
$current_word = $character;
209+
$mode = 'whitespace';
210+
} else {
211+
if (
212+
(ctype_alnum($character) && (strlen($current_word) == 0 || $this->isPartOfWord($current_word))) ||
213+
(in_array($character, $this->specialCaseChars) && isset($characterString[$i+1]) && $this->isPartOfWord($characterString[$i+1]))
214+
) {
215+
$current_word .= $character;
216+
} else {
217+
$words[] = $current_word;
218+
$current_word = $character;
219+
}
220+
}
221+
break;
222+
case 'tag' :
223+
if ( $this->isEndOfTag( $character ) ) {
224+
$current_word .= ">";
225+
$words[] = $current_word;
226+
$current_word = "";
227+
228+
if ( !preg_match('[^\s]', $character ) ) {
229+
$mode = 'whitespace';
230+
} else {
231+
$mode = 'character';
232+
}
233+
} else {
234+
$current_word .= $character;
235+
}
236+
break;
237+
case 'whitespace':
238+
if ( $this->isStartOfTag( $character ) ) {
239+
if ($current_word != '') {
240+
$words[] = $current_word;
241+
}
242+
$current_word = "<";
243+
$mode = 'tag';
244+
} elseif ( preg_match( "[^\s]", $character ) ) {
245+
$current_word .= $character;
246+
} else {
247+
if ($current_word != '') {
248+
$words[] = $current_word;
249+
}
250+
$current_word = $character;
251+
$mode = 'character';
252+
}
253+
break;
254+
default:
255+
break;
256+
}
257+
}
258+
if ($current_word != '') {
259+
$words[] = $current_word;
260+
}
261+
262+
return $words;
263+
}
264+
265+
protected function isStartOfTag($val)
266+
{
267+
return $val == "<";
268+
}
269+
270+
protected function isEndOfTag($val)
271+
{
272+
return $val == ">";
273+
}
274+
275+
protected function isWhiteSpace($value)
276+
{
277+
return !preg_match( '[^\s]', $value );
278+
}
279+
280+
protected function explode($value)
281+
{
282+
// as suggested by @onassar
283+
return preg_split( '//u', $value );
284+
}
176285
}

lib/Caxy/HtmlDiff/HtmlDiff.php

Lines changed: 63 additions & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ public function getInsertSpaceInReplace()
3333
public function build()
3434
{
3535
$this->splitInputsToWords();
36+
$this->replaceTables();
3637
$this->indexNewWords();
3738
$operations = $this->operations();
3839
foreach ($operations as $item) {
@@ -57,111 +58,56 @@ protected function indexNewWords()
5758
}
5859
}
5960

60-
protected function splitInputsToWords()
61-
{
62-
$this->oldWords = $this->convertHtmlToListOfWords( $this->explode( $this->oldText ) );
63-
$this->newWords = $this->convertHtmlToListOfWords( $this->explode( $this->newText ) );
64-
}
65-
66-
protected function isPartOfWord($text)
61+
protected function replaceTables()
6762
{
68-
return ctype_alnum(str_replace($this->specialCaseChars, '', $text));
63+
$this->oldTables = $this->createTablePlaceholders($this->oldWords);
64+
$this->newTables = $this->createTablePlaceholders($this->newWords);
6965
}
7066

71-
protected function convertHtmlToListOfWords($characterString)
67+
protected function createTablePlaceholders(&$words)
7268
{
73-
$mode = 'character';
74-
$current_word = '';
75-
$words = array();
76-
foreach ($characterString as $i => $character) {
77-
switch ($mode) {
78-
case 'character':
79-
if ( $this->isStartOfTag( $character ) ) {
80-
if ($current_word != '') {
81-
$words[] = $current_word;
82-
}
83-
$current_word = "<";
84-
$mode = 'tag';
85-
} elseif ( preg_match( "[^\s]", $character ) > 0 ) {
86-
if ($current_word != '') {
87-
$words[] = $current_word;
88-
}
89-
$current_word = $character;
90-
$mode = 'whitespace';
91-
} else {
92-
if (
93-
(ctype_alnum($character) && (strlen($current_word) == 0 || $this->isPartOfWord($current_word))) ||
94-
(in_array($character, $this->specialCaseChars) && isset($characterString[$i+1]) && $this->isPartOfWord($characterString[$i+1]))
95-
) {
96-
$current_word .= $character;
97-
} else {
98-
$words[] = $current_word;
99-
$current_word = $character;
100-
}
69+
$openTables = 0;
70+
$tableIndices = array();
71+
$tableStart = 0;
72+
foreach ($words as $index => $word) {
73+
if ($this->isOpeningTable($word)) {
74+
if ($openTables === 0) {
75+
$tableStart = $index;
10176
}
102-
break;
103-
case 'tag' :
104-
if ( $this->isEndOfTag( $character ) ) {
105-
$current_word .= ">";
106-
$words[] = $current_word;
107-
$current_word = "";
108-
109-
if ( !preg_match('[^\s]', $character ) ) {
110-
$mode = 'whitespace';
111-
} else {
112-
$mode = 'character';
113-
}
114-
} else {
115-
$current_word .= $character;
116-
}
117-
break;
118-
case 'whitespace':
119-
if ( $this->isStartOfTag( $character ) ) {
120-
if ($current_word != '') {
121-
$words[] = $current_word;
122-
}
123-
$current_word = "<";
124-
$mode = 'tag';
125-
} elseif ( preg_match( "[^\s]", $character ) ) {
126-
$current_word .= $character;
127-
} else {
128-
if ($current_word != '') {
129-
$words[] = $current_word;
130-
}
131-
$current_word = $character;
132-
$mode = 'character';
77+
$openTables++;
78+
} elseif ($openTables > 0 && $this->isClosingTable($word)) {
79+
$openTables--;
80+
if ($openTables === 0) {
81+
$tableIndices[] = array('start' => $tableStart, 'length' => $index - $tableStart + 1);
13382
}
134-
break;
135-
default:
136-
break;
13783
}
13884
}
139-
if ($current_word != '') {
140-
$words[] = $current_word;
85+
86+
$tables = array();
87+
$offset = 0;
88+
foreach ($tableIndices as $tableIndex) {
89+
$start = $tableIndex['start'] - $offset;
90+
$tables[$start] = array_splice($words, $start, $tableIndex['length'], '[[REPLACE_TABLE]]');
91+
$offset += $tableIndex['length'] - 1;
14192
}
14293

143-
return $words;
94+
return $tables;
14495
}
14596

146-
protected function isStartOfTag($val)
97+
protected function isOpeningTable($item)
14798
{
148-
return $val == "<";
99+
return preg_match("#<table[^>]+>\\s*#iU", $item);
149100
}
150101

151-
protected function isEndOfTag($val)
102+
protected function isClosingTable($item)
152103
{
153-
return $val == ">";
104+
return preg_match("#</table[^>]*>\\s*#iU", $item);
154105
}
155106

156-
protected function isWhiteSpace($value)
157-
{
158-
return !preg_match( '[^\s]', $value );
159-
}
160-
161-
protected function explode($value)
107+
protected function splitInputsToWords()
162108
{
163-
// as suggested by @onassar
164-
return preg_split( '//u', $value );
109+
$this->oldWords = $this->convertHtmlToListOfWords( $this->explode( $this->oldText ) );
110+
$this->newWords = $this->convertHtmlToListOfWords( $this->explode( $this->newText ) );
165111
}
166112

167113
protected function performOperation($operation)
@@ -207,7 +153,13 @@ protected function processInsertOperation($operation, $cssClass)
207153
$text = array();
208154
foreach ($this->newWords as $pos => $s) {
209155
if ($pos >= $operation->startInNew && $pos < $operation->endInNew) {
210-
$text[] = $s;
156+
if ($s === '[[REPLACE_TABLE]]' && isset($this->newTables[$pos])) {
157+
foreach ($this->newTables[$pos] as $word) {
158+
$text[] = $word;
159+
}
160+
} else {
161+
$text[] = $s;
162+
}
211163
}
212164
}
213165
$this->insertTag( "ins", $cssClass, $text );
@@ -218,18 +170,36 @@ protected function processDeleteOperation($operation, $cssClass)
218170
$text = array();
219171
foreach ($this->oldWords as $pos => $s) {
220172
if ($pos >= $operation->startInOld && $pos < $operation->endInOld) {
221-
$text[] = $s;
173+
if ($s === '[[REPLACE_TABLE]]' && isset($this->oldTables[$pos])) {
174+
foreach ($this->oldTables[$pos] as $word) {
175+
$text[] = $word;
176+
}
177+
} else {
178+
$text[] = $s;
179+
}
222180
}
223181
}
224182
$this->insertTag( "del", $cssClass, $text );
225183
}
226184

185+
protected function diffTables($oldText, $newText)
186+
{
187+
$diff = new TableDiff($oldText, $newText, $this->encoding, $this->specialCaseTags, $this->groupDiffs);
188+
return $diff->build();
189+
}
190+
227191
protected function processEqualOperation($operation)
228192
{
229193
$result = array();
230194
foreach ($this->newWords as $pos => $s) {
231195
if ($pos >= $operation->startInNew && $pos < $operation->endInNew) {
232-
$result[] = $s;
196+
if ($s === '[[REPLACE_TABLE]]' && isset($this->newTables[$pos])) {
197+
$oldText = implode("", $this->oldTables[$operation->startInOld]);
198+
$newText = implode("", $this->newTables[$pos]);
199+
$result[] = $this->diffTables($oldText, $newText);
200+
} else {
201+
$result[] = $s;
202+
}
233203
}
234204
}
235205
$this->content .= implode( "", $result );
@@ -446,17 +416,17 @@ protected function findMatch($startInOld, $endInOld, $startInNew, $endInNew)
446416
}
447417
$matchLengthAt = $newMatchLengthAt;
448418
}
449-
419+
450420
// Skip match if none found or match consists only of whitespace
451-
if ($bestMatchSize != 0 &&
421+
if ($bestMatchSize != 0 &&
452422
(
453-
!$this->isGroupDiffs() ||
423+
!$this->isGroupDiffs() ||
454424
!preg_match('/^\s+$/', implode('', array_slice($this->oldWords, $bestMatchInOld, $bestMatchSize)))
455425
)
456426
) {
457427
return new Match($bestMatchInOld, $bestMatchInNew, $bestMatchSize);
458428
}
459-
429+
460430
return null;
461431
}
462432
}

0 commit comments

Comments
 (0)