Skip to content

Commit 71071c5

Browse files
committed
Completed the ListDiff diffing functionality.
1 parent a600082 commit 71071c5

File tree

5 files changed

+102
-70
lines changed

5 files changed

+102
-70
lines changed

demo/demo.js

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,6 @@ demo.controller('diffCtrl', ['$scope', '$http', '$sce', '$timeout', function ($s
3838
$scope.loading = true;
3939
$http.post('index.php', { oldText: $scope.oldText, newText: $scope.newText })
4040
.success(function (data) {
41-
//alert(JSON.stringify(data));
4241
$scope.diff = data.diff;
4342
$scope.loading = false;
4443
});

demo/index.php

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
'Caxy/HtmlDiff/HtmlDiff',
1111
'Caxy/HtmlDiff/Match',
1212
'Caxy/HtmlDiff/Operation',
13-
'Caxy/HtmlDiff/ListDiff',
13+
'Caxy/HtmlDiff/ListDiff'
1414
);
1515

1616
foreach ($classes as $class) {
@@ -21,9 +21,8 @@
2121

2222
if ($input) {
2323
$data = json_decode($input, true);
24-
$diff = new HtmlDiff($_POST['oldText'], $_POST['newText'], 'UTF-8', array());
24+
$diff = new HtmlDiff($data['oldText'], $data['newText'], 'UTF-8', array());
2525
$diff->build();
26-
echo $diff->getDifference();die;
2726

2827
header('Content-Type: application/json');
2928
echo json_encode(array('diff' => $diff->getDifference()));

lib/Caxy/HtmlDiff/HtmlDiff.php

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -211,8 +211,7 @@ protected function diffElements($oldText, $newText)
211211
protected function diffList($oldText, $newText)
212212
{
213213
$diff = new ListDiff($oldText, $newText, $this->encoding, $this->isolatedDiffTags, $this->groupDiffs);
214-
$diff->build();
215-
return $diff->getDifference();
214+
return $diff->build();
216215
}
217216

218217
protected function processEqualOperation($operation)

lib/Caxy/HtmlDiff/ListDiff.php

Lines changed: 99 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22

33
namespace Caxy\HtmlDiff;
44

5-
65
class ListDiff extends HtmlDiff
76
{
87
protected $listWords = array();
@@ -18,23 +17,27 @@ class ListDiff extends HtmlDiff
1817
protected $list; // hold the old/new content of the content of the list
1918
protected $childLists; // contains the old/new child lists content within this list
2019
protected $textMatches; // contains the old/new text strings that match
21-
//protected $childListObjects;
22-
protected $listsIndex;
20+
protected $listsIndex; // contains the indexed start positions of each list within word string.
2321

22+
/**
23+
* We're using the same functions as the parent in build() to get us to the point of
24+
* manipulating the data within this class.
25+
*/
2426
public function build()
2527
{
26-
ini_set('xdebug.var_display_max_depth', 5);
27-
ini_set('xdebug.var_display_max_children', 256);
28-
ini_set('xdebug.var_display_max_data', 1024);
29-
// get content from li's
28+
// Use the parent functions to get the data we need organized.
3029
$this->splitInputsToWords();
3130
$this->replaceIsolatedDiffTags();
3231
$this->indexNewWords();
32+
// Now use the custom functions in this class to use the data and generate our diff.
3333
$this->diffListContent();
3434

3535
return $this->content;
3636
}
3737

38+
/**
39+
* Calls to the actual custom functions of this class, to diff list content.
40+
*/
3841
protected function diffListContent()
3942
{
4043
/* Format the list we're focusing on.
@@ -46,14 +49,22 @@ protected function diffListContent()
4649
* make sure we use placeholders to replace the nested lists
4750
*/
4851
$this->replaceListIsolatedDiffTags();
49-
// Set matches of lists.
52+
/* Build a list of matches we can reference when we diff the contents of the lists.
53+
* This is needed so that we each NEW list node is matched against the best possible OLD list node/
54+
* It helps us determine whether the list was added, removed, or changed.
55+
*/
5056
$this->matchAndCompareLists();
51-
// Create child lists objects
52-
$this->createChildListObjects();
53-
// Diff the child lists
57+
/* Go through the list of matches, and diff the contents of each.
58+
* Any nested lists would be sent to parent's diffList function, which creates a new listDiff class.
59+
*/
5460
$this->diff();
5561
}
5662

63+
/*
64+
* This function is used to remove the wrapped ul, ol, or dl characters from this list
65+
* and sets the listType as ul, ol, or dl, so that we can use it later.
66+
* $list is being set here as well, as an array with the old and new version of this list content.
67+
*/
5768
protected function formatThisListContent()
5869
{
5970
foreach ($this->oldIsolatedDiffTags as $key => $diffTagArray) {
@@ -76,14 +87,24 @@ protected function formatThisListContent()
7687

7788
protected function matchAndCompareLists()
7889
{
79-
// Build childLists array of old/new content of lists.
90+
/**
91+
* Build the an array (childLists) to hold the contents of the list nodes within this list.
92+
* This only holds the content of each list node.
93+
*/
8094
$this->buildChildLists();
8195

96+
/**
97+
* Index the list, starting positions, so that we can refer back to it later.
98+
* This is used to see where one list node starts and another ends.
99+
*/
82100
$this->indexLists();
83-
// Compare the lists, saving total matches to textMatches array.
101+
102+
/**
103+
* Compare the lists and build $textMatches array with the matches.
104+
* Each match is an array of "new" and "old" keys, with the id of the list it matches to.
105+
* Whenever there is no match (in cases where a new list item was added or removed), null is used instead of the id.
106+
*/
84107
$this->compareChildLists();
85-
// Create the child list objects from textMatches array
86-
$this->createChildListObjects();
87108
}
88109

89110
protected function compareChildLists()
@@ -94,142 +115,161 @@ protected function compareChildLists()
94115
foreach ($this->childLists['new'] as $thisKey => $thisList) {
95116
$bestMatchPercentages[$thisKey] = array();
96117
foreach ($this->childLists['old'] as $thatKey => $thatList) {
118+
// Save the percent amount each new list content compares against the old list content.
97119
similar_text($thisList, $thatList, $percentage);
98120
$bestMatchPercentages[$thisKey][] = $percentage;
99121
}
100122
}
101123

124+
// Sort each array by value, highest percent to lowest percent.
102125
foreach ($bestMatchPercentages as &$thisMatch) {
103126
arsort($thisMatch);
104127
}
105-
//var_dump($bestMatchPercentages);
106128

107129
// Build matches.
108130
$matches = array();
109131
$taken = array();
132+
$takenItems = array();
110133
$absoluteMatch = 100;
111134
foreach ($bestMatchPercentages as $item => $percentages) {
112135
$highestMatch = -1;
113136
$highestMatchKey = -1;
137+
$takeItemKey = -1;
114138

115139
foreach ($percentages as $key => $percent) {
116140
// Check that the key for the percentage is not already taken and the new percentage is higher.
117141
if (!in_array($key, $taken) && $percent > $highestMatch) {
118142
// If an absolute match, choose this one.
119143
if ($percent == $absoluteMatch) {
144+
//$this->dump("Absolute found");
120145
$highestMatch = $percent;
121146
$highestMatchKey = $key;
147+
$takenItemKey = $item;
122148
break;
123149
} else {
124150
// Get all the other matces for the same $key
125151
$columns = array_column($bestMatchPercentages, $key);
126-
//$str = "All the other matches for this key:".$key; var_dump($str);
127-
//var_dump($columns);
128152
$thisBestMatches = array_filter(
129153
$columns,
130154
function ($v) use ($percent) {
131155
return $v > $percent;
132156
}
133157
);
134158

135-
//$str = "Best Matches Sorted, with lower matches filtered out: ".$percent; var_dump($str);
136159
arsort($thisBestMatches);
137-
//var_dump($thisBestMatches);
138160

139161
// If no greater amounts, use this one.
140162
if (!count($thisBestMatches)) {
141163
$highestMatch = $percent;
142164
$highestMatchKey = $key;
165+
$takenItemKey = $item;
143166
break;
144167
}
145168

146169
// Loop through, comparing only the items that have not already been added.
147-
/*foreach ($thisBestMatches as $k => $v) {
148-
if (!in_array($k, $takenItems)) {
170+
foreach ($thisBestMatches as $k => $v) {
171+
if (in_array($k, $takenItems)) {
149172
$highestMatch = $percent;
150173
$highestMatchKey = $key;
151174
$takenItemKey = $item;
152175
break(2);
153176
}
154-
}*/
177+
}
155178
}
156179
}
157180
}
158181

159182
$matches[] = array('new' => $item, 'old' => $highestMatchKey > -1 ? $highestMatchKey : null);
160183
if ($highestMatchKey > -1) {
161184
$taken[] = $highestMatchKey;
185+
$takenItems[] = $takenItemKey;
186+
}
187+
}
188+
189+
/* Checking for removed items. Basically, if a list item from the old lists is removed
190+
* it will not be accounted for, and will disappear in the results altogether.
191+
* Loop through all the old lists, any that has not been added, will be added as:
192+
* array( new => null, old => oldItemId )
193+
*/
194+
$matchColumns = array_column($matches, 'old');
195+
foreach ($this->childLists['old'] as $thisKey => $thisList) {
196+
if (!in_array($thisKey, $matchColumns)) {
197+
$matches[] = array('new' => null, 'old' => $thisKey);
162198
}
163199
}
164200

165201
// Save the matches.
166202
$this->textMatches = $matches;
167-
$this->dump($matches);
168203
}
169204

205+
/**
206+
* Build multidimensional array holding the contents of each list node, old and new.
207+
*/
170208
protected function buildChildLists()
171209
{
172210
$this->childLists['old'] = $this->getListsContent($this->list['old']);
173211
$this->childLists['new'] = $this->getListsContent($this->list['new']);
174212
}
175213

176-
protected function createChildListObjects()
177-
{
178-
/*$this->childListObjects = array();
179-
foreach ($this->textMatches as $match) {
180-
$object = new ListNode($match['old'], $match['new']);
181-
$this->childListObjects[] = $object;
182-
}*/
183-
}
184-
214+
/**
215+
* Diff the actual contents of the lists against their matched counterpart.
216+
* Build the content of the class.
217+
*/
185218
protected function diff()
186219
{
220+
// Add the opening parent node from listType. So if ol, <ol>, etc.
187221
$this->content = $this->addListTypeWrapper();
188222

189223
foreach ($this->textMatches as $key => $matches) {
190224
$oldText = $matches['old'] !== null ? $this->childLists['old'][$matches['old']] : '';
191225
$newText = $matches['new'] !== null ? $this->childLists['new'][$matches['new']] : '';
192-
$this->dump("OLD TEXT: ". $oldText);
193-
$this->dump("NEW TEXT: ".$newText);
194226

227+
// Add the opened and closed the list
195228
$this->content .= "<li>";
196-
if ($newText && !$oldText) {
197-
$this->content .= $newText;
198-
} elseif ($oldText && !$newText) {
199-
$this->content .= "THIS RIGHT HERE";
200-
} else {
201-
$thisDiff = $this->processPlaceholders($this->diffElements($oldText, $newText), $matches);
202-
$this->content .= $thisDiff;
203-
}
229+
// Process any placeholders, if they exist.
230+
// Placeholders would be nested lists (a nested ol, ul, dl for example).
231+
$this->content .= $this->processPlaceholders($this->diffElements($oldText, $newText), $matches);
204232
$this->content .= "</li>";
205233
}
206234

235+
// Add the closing parent node from listType. So if ol, </ol>, etc.
207236
$this->content .= $this->addListTypeWrapper(false);
208237
}
209238

239+
/**
240+
* Return the contents of each list node.
241+
* Process any placeholders for nested lists.
242+
*/
210243
protected function processPlaceholders($text, array $matches)
211244
{
245+
// Prepare return
212246
$returnText = array();
247+
// Save the contents of all list nodes, new and old.
213248
$contentVault = array(
214249
'old' => $this->getListContent('old', $matches),
215250
'new' => $this->getListContent('new', $matches)
216251
);
217252

218253
$count = 0;
254+
// Loop through the text checking for placeholders. If a nested list is found, create a new ListDiff object for it.
219255
foreach (explode(' ', $text) as $word) {
220256
$content = $word;
221257
if (in_array($word, $this->isolatedDiffTags)) {
222258
$oldText = implode('', $contentVault['old'][$count]);
223259
$newText = implode('', $contentVault['new'][$count]);
224-
$content = $this->diffList($oldText, $newText);
260+
$content = $this->diffList($oldText, $newText, true);
225261
$count++;
226262
}
227263

228264
$returnText[] = $content;
229265
}
266+
// Return the result.
230267
return implode(' ', $returnText);
231268
}
232269

270+
/**
271+
* Grab the list content using the listsIndex array.
272+
*/
233273
protected function getListContent($indexKey = 'new', array $matches)
234274
{
235275
$bucket = array();
@@ -243,6 +283,12 @@ protected function getListContent($indexKey = 'new', array $matches)
243283
return $bucket;
244284
}
245285

286+
/**
287+
* indexLists
288+
*
289+
* Index the list, starting positions, so that we can refer back to it later.
290+
* This is used to see where one list node starts and another ends.
291+
*/
246292
protected function indexLists()
247293
{
248294
$this->listsIndex = array();
@@ -259,22 +305,26 @@ protected function indexLists()
259305
}
260306
}
261307

308+
/**
309+
* Adds the opening or closing list html element, based on listType.
310+
*/
262311
protected function addListTypeWrapper($opening = true)
263312
{
264313
return "<" . (!$opening ? "/" : '') . $this->listType . ">";
265314
}
266315

267-
protected function dump($content)
268-
{
269-
var_dump($content);
270-
}
271-
316+
/**
317+
* Replace nested list with placeholders.
318+
*/
272319
public function replaceListIsolatedDiffTags()
273320
{
274321
$this->listIsolatedDiffTags['old'] = $this->createIsolatedDiffTagPlaceholders($this->list['old']);
275322
$this->listIsolatedDiffTags['new'] = $this->createIsolatedDiffTagPlaceholders($this->list['new']);
276323
}
277324

325+
/**
326+
* Grab the contents of a list node.
327+
*/
278328
protected function getListsContent(array $contentArray, $stripTags = true)
279329
{
280330
preg_match_all('/<li>(.*?)<\/li>/s', implode('', $contentArray), $matches);

lib/Caxy/HtmlDiff/ListNode.php

Lines changed: 0 additions & 15 deletions
This file was deleted.

0 commit comments

Comments
 (0)