22
33namespace Caxy \HtmlDiff ;
44
5-
65class ListDiff extends HtmlDiff
76{
87 protected $ listWords = array ();
@@ -18,23 +17,27 @@ class ListDiff extends HtmlDiff
1817 protected $ list ; // hold the old/new content of the content of the list
1918 protected $ childLists ; // contains the old/new child lists content within this list
2019 protected $ textMatches ; // contains the old/new text strings that match
21- //protected $childListObjects;
22- protected $ listsIndex ;
20+ protected $ listsIndex ; // contains the indexed start positions of each list within word string.
2321
22+ /**
23+ * We're using the same functions as the parent in build() to get us to the point of
24+ * manipulating the data within this class.
25+ */
2426 public function build ()
2527 {
26- ini_set ('xdebug.var_display_max_depth ' , 5 );
27- ini_set ('xdebug.var_display_max_children ' , 256 );
28- ini_set ('xdebug.var_display_max_data ' , 1024 );
29- // get content from li's
28+ // Use the parent functions to get the data we need organized.
3029 $ this ->splitInputsToWords ();
3130 $ this ->replaceIsolatedDiffTags ();
3231 $ this ->indexNewWords ();
32+ // Now use the custom functions in this class to use the data and generate our diff.
3333 $ this ->diffListContent ();
3434
3535 return $ this ->content ;
3636 }
3737
38+ /**
39+ * Calls to the actual custom functions of this class, to diff list content.
40+ */
3841 protected function diffListContent ()
3942 {
4043 /* Format the list we're focusing on.
@@ -46,14 +49,22 @@ protected function diffListContent()
4649 * make sure we use placeholders to replace the nested lists
4750 */
4851 $ this ->replaceListIsolatedDiffTags ();
49- // Set matches of lists.
52+ /* Build a list of matches we can reference when we diff the contents of the lists.
53+ * This is needed so that we each NEW list node is matched against the best possible OLD list node/
54+ * It helps us determine whether the list was added, removed, or changed.
55+ */
5056 $ this ->matchAndCompareLists ();
51- // Create child lists objects
52- $ this -> createChildListObjects ();
53- // Diff the child lists
57+ /* Go through the list of matches, and diff the contents of each.
58+ * Any nested lists would be sent to parent's diffList function, which creates a new listDiff class.
59+ */
5460 $ this ->diff ();
5561 }
5662
63+ /*
64+ * This function is used to remove the wrapped ul, ol, or dl characters from this list
65+ * and sets the listType as ul, ol, or dl, so that we can use it later.
66+ * $list is being set here as well, as an array with the old and new version of this list content.
67+ */
5768 protected function formatThisListContent ()
5869 {
5970 foreach ($ this ->oldIsolatedDiffTags as $ key => $ diffTagArray ) {
@@ -76,14 +87,24 @@ protected function formatThisListContent()
7687
7788 protected function matchAndCompareLists ()
7889 {
79- // Build childLists array of old/new content of lists.
90+ /**
91+ * Build the an array (childLists) to hold the contents of the list nodes within this list.
92+ * This only holds the content of each list node.
93+ */
8094 $ this ->buildChildLists ();
8195
96+ /**
97+ * Index the list, starting positions, so that we can refer back to it later.
98+ * This is used to see where one list node starts and another ends.
99+ */
82100 $ this ->indexLists ();
83- // Compare the lists, saving total matches to textMatches array.
101+
102+ /**
103+ * Compare the lists and build $textMatches array with the matches.
104+ * Each match is an array of "new" and "old" keys, with the id of the list it matches to.
105+ * Whenever there is no match (in cases where a new list item was added or removed), null is used instead of the id.
106+ */
84107 $ this ->compareChildLists ();
85- // Create the child list objects from textMatches array
86- $ this ->createChildListObjects ();
87108 }
88109
89110 protected function compareChildLists ()
@@ -94,142 +115,161 @@ protected function compareChildLists()
94115 foreach ($ this ->childLists ['new ' ] as $ thisKey => $ thisList ) {
95116 $ bestMatchPercentages [$ thisKey ] = array ();
96117 foreach ($ this ->childLists ['old ' ] as $ thatKey => $ thatList ) {
118+ // Save the percent amount each new list content compares against the old list content.
97119 similar_text ($ thisList , $ thatList , $ percentage );
98120 $ bestMatchPercentages [$ thisKey ][] = $ percentage ;
99121 }
100122 }
101123
124+ // Sort each array by value, highest percent to lowest percent.
102125 foreach ($ bestMatchPercentages as &$ thisMatch ) {
103126 arsort ($ thisMatch );
104127 }
105- //var_dump($bestMatchPercentages);
106128
107129 // Build matches.
108130 $ matches = array ();
109131 $ taken = array ();
132+ $ takenItems = array ();
110133 $ absoluteMatch = 100 ;
111134 foreach ($ bestMatchPercentages as $ item => $ percentages ) {
112135 $ highestMatch = -1 ;
113136 $ highestMatchKey = -1 ;
137+ $ takeItemKey = -1 ;
114138
115139 foreach ($ percentages as $ key => $ percent ) {
116140 // Check that the key for the percentage is not already taken and the new percentage is higher.
117141 if (!in_array ($ key , $ taken ) && $ percent > $ highestMatch ) {
118142 // If an absolute match, choose this one.
119143 if ($ percent == $ absoluteMatch ) {
144+ //$this->dump("Absolute found");
120145 $ highestMatch = $ percent ;
121146 $ highestMatchKey = $ key ;
147+ $ takenItemKey = $ item ;
122148 break ;
123149 } else {
124150 // Get all the other matces for the same $key
125151 $ columns = array_column ($ bestMatchPercentages , $ key );
126- //$str = "All the other matches for this key:".$key; var_dump($str);
127- //var_dump($columns);
128152 $ thisBestMatches = array_filter (
129153 $ columns ,
130154 function ($ v ) use ($ percent ) {
131155 return $ v > $ percent ;
132156 }
133157 );
134158
135- //$str = "Best Matches Sorted, with lower matches filtered out: ".$percent; var_dump($str);
136159 arsort ($ thisBestMatches );
137- //var_dump($thisBestMatches);
138160
139161 // If no greater amounts, use this one.
140162 if (!count ($ thisBestMatches )) {
141163 $ highestMatch = $ percent ;
142164 $ highestMatchKey = $ key ;
165+ $ takenItemKey = $ item ;
143166 break ;
144167 }
145168
146169 // Loop through, comparing only the items that have not already been added.
147- /* foreach ($thisBestMatches as $k => $v) {
148- if (! in_array($k, $takenItems)) {
170+ foreach ($ thisBestMatches as $ k => $ v ) {
171+ if (in_array ($ k , $ takenItems )) {
149172 $ highestMatch = $ percent ;
150173 $ highestMatchKey = $ key ;
151174 $ takenItemKey = $ item ;
152175 break (2 );
153176 }
154- }*/
177+ }
155178 }
156179 }
157180 }
158181
159182 $ matches [] = array ('new ' => $ item , 'old ' => $ highestMatchKey > -1 ? $ highestMatchKey : null );
160183 if ($ highestMatchKey > -1 ) {
161184 $ taken [] = $ highestMatchKey ;
185+ $ takenItems [] = $ takenItemKey ;
186+ }
187+ }
188+
189+ /* Checking for removed items. Basically, if a list item from the old lists is removed
190+ * it will not be accounted for, and will disappear in the results altogether.
191+ * Loop through all the old lists, any that has not been added, will be added as:
192+ * array( new => null, old => oldItemId )
193+ */
194+ $ matchColumns = array_column ($ matches , 'old ' );
195+ foreach ($ this ->childLists ['old ' ] as $ thisKey => $ thisList ) {
196+ if (!in_array ($ thisKey , $ matchColumns )) {
197+ $ matches [] = array ('new ' => null , 'old ' => $ thisKey );
162198 }
163199 }
164200
165201 // Save the matches.
166202 $ this ->textMatches = $ matches ;
167- $ this ->dump ($ matches );
168203 }
169204
205+ /**
206+ * Build multidimensional array holding the contents of each list node, old and new.
207+ */
170208 protected function buildChildLists ()
171209 {
172210 $ this ->childLists ['old ' ] = $ this ->getListsContent ($ this ->list ['old ' ]);
173211 $ this ->childLists ['new ' ] = $ this ->getListsContent ($ this ->list ['new ' ]);
174212 }
175213
176- protected function createChildListObjects ()
177- {
178- /*$this->childListObjects = array();
179- foreach ($this->textMatches as $match) {
180- $object = new ListNode($match['old'], $match['new']);
181- $this->childListObjects[] = $object;
182- }*/
183- }
184-
214+ /**
215+ * Diff the actual contents of the lists against their matched counterpart.
216+ * Build the content of the class.
217+ */
185218 protected function diff ()
186219 {
220+ // Add the opening parent node from listType. So if ol, <ol>, etc.
187221 $ this ->content = $ this ->addListTypeWrapper ();
188222
189223 foreach ($ this ->textMatches as $ key => $ matches ) {
190224 $ oldText = $ matches ['old ' ] !== null ? $ this ->childLists ['old ' ][$ matches ['old ' ]] : '' ;
191225 $ newText = $ matches ['new ' ] !== null ? $ this ->childLists ['new ' ][$ matches ['new ' ]] : '' ;
192- $ this ->dump ("OLD TEXT: " . $ oldText );
193- $ this ->dump ("NEW TEXT: " .$ newText );
194226
227+ // Add the opened and closed the list
195228 $ this ->content .= "<li> " ;
196- if ($ newText && !$ oldText ) {
197- $ this ->content .= $ newText ;
198- } elseif ($ oldText && !$ newText ) {
199- $ this ->content .= "THIS RIGHT HERE " ;
200- } else {
201- $ thisDiff = $ this ->processPlaceholders ($ this ->diffElements ($ oldText , $ newText ), $ matches );
202- $ this ->content .= $ thisDiff ;
203- }
229+ // Process any placeholders, if they exist.
230+ // Placeholders would be nested lists (a nested ol, ul, dl for example).
231+ $ this ->content .= $ this ->processPlaceholders ($ this ->diffElements ($ oldText , $ newText ), $ matches );
204232 $ this ->content .= "</li> " ;
205233 }
206234
235+ // Add the closing parent node from listType. So if ol, </ol>, etc.
207236 $ this ->content .= $ this ->addListTypeWrapper (false );
208237 }
209238
239+ /**
240+ * Return the contents of each list node.
241+ * Process any placeholders for nested lists.
242+ */
210243 protected function processPlaceholders ($ text , array $ matches )
211244 {
245+ // Prepare return
212246 $ returnText = array ();
247+ // Save the contents of all list nodes, new and old.
213248 $ contentVault = array (
214249 'old ' => $ this ->getListContent ('old ' , $ matches ),
215250 'new ' => $ this ->getListContent ('new ' , $ matches )
216251 );
217252
218253 $ count = 0 ;
254+ // Loop through the text checking for placeholders. If a nested list is found, create a new ListDiff object for it.
219255 foreach (explode (' ' , $ text ) as $ word ) {
220256 $ content = $ word ;
221257 if (in_array ($ word , $ this ->isolatedDiffTags )) {
222258 $ oldText = implode ('' , $ contentVault ['old ' ][$ count ]);
223259 $ newText = implode ('' , $ contentVault ['new ' ][$ count ]);
224- $ content = $ this ->diffList ($ oldText , $ newText );
260+ $ content = $ this ->diffList ($ oldText , $ newText, true );
225261 $ count ++;
226262 }
227263
228264 $ returnText [] = $ content ;
229265 }
266+ // Return the result.
230267 return implode (' ' , $ returnText );
231268 }
232269
270+ /**
271+ * Grab the list content using the listsIndex array.
272+ */
233273 protected function getListContent ($ indexKey = 'new ' , array $ matches )
234274 {
235275 $ bucket = array ();
@@ -243,6 +283,12 @@ protected function getListContent($indexKey = 'new', array $matches)
243283 return $ bucket ;
244284 }
245285
286+ /**
287+ * indexLists
288+ *
289+ * Index the list, starting positions, so that we can refer back to it later.
290+ * This is used to see where one list node starts and another ends.
291+ */
246292 protected function indexLists ()
247293 {
248294 $ this ->listsIndex = array ();
@@ -259,22 +305,26 @@ protected function indexLists()
259305 }
260306 }
261307
308+ /**
309+ * Adds the opening or closing list html element, based on listType.
310+ */
262311 protected function addListTypeWrapper ($ opening = true )
263312 {
264313 return "< " . (!$ opening ? "/ " : '' ) . $ this ->listType . "> " ;
265314 }
266315
267- protected function dump ($ content )
268- {
269- var_dump ($ content );
270- }
271-
316+ /**
317+ * Replace nested list with placeholders.
318+ */
272319 public function replaceListIsolatedDiffTags ()
273320 {
274321 $ this ->listIsolatedDiffTags ['old ' ] = $ this ->createIsolatedDiffTagPlaceholders ($ this ->list ['old ' ]);
275322 $ this ->listIsolatedDiffTags ['new ' ] = $ this ->createIsolatedDiffTagPlaceholders ($ this ->list ['new ' ]);
276323 }
277324
325+ /**
326+ * Grab the contents of a list node.
327+ */
278328 protected function getListsContent (array $ contentArray , $ stripTags = true )
279329 {
280330 preg_match_all ('/<li>(.*?)<\/li>/s ' , implode ('' , $ contentArray ), $ matches );
0 commit comments