44
55use Caxy \HtmlDiff \AbstractDiff ;
66use Caxy \HtmlDiff \HtmlDiff ;
7+ use Caxy \HtmlDiff \Operation ;
78
89/**
910 * @todo Add getters to TableMatch entity
1516 */
1617class TableDiff extends AbstractDiff
1718{
19+ const STRATEGY_MATCHING = 'matching ' ;
20+ const STRATEGY_RELATIVE = 'relative ' ;
21+
1822 /**
1923 * @var null|Table
2024 */
@@ -55,6 +59,8 @@ class TableDiff extends AbstractDiff
5559 */
5660 protected $ purifier ;
5761
62+ protected $ strategy = self ::STRATEGY_MATCHING ;
63+
5864 public function __construct ($ oldText , $ newText , $ encoding , $ specialCaseTags , $ groupDiffs )
5965 {
6066 parent ::__construct ($ oldText , $ newText , $ encoding , $ specialCaseTags , $ groupDiffs );
@@ -63,6 +69,18 @@ public function __construct($oldText, $newText, $encoding, $specialCaseTags, $gr
6369 $ this ->purifier = new \HTMLPurifier ($ config );
6470 }
6571
72+ public function setStrategy ($ strategy )
73+ {
74+ $ this ->strategy = $ strategy ;
75+
76+ return $ this ;
77+ }
78+
79+ public function getStrategy ()
80+ {
81+ return $ this ->strategy ;
82+ }
83+
6684 public function build ()
6785 {
6886 $ this ->buildTableDoms ();
@@ -184,11 +202,127 @@ protected function diffTableContent()
184202
185203 addDebugOutput ($ matches , __METHOD__ );
186204
187- $ this ->diffTableRows ($ oldRows , $ newRows , $ oldMatchData );
205+ // new solution for diffing rows
206+ switch ($ this ->strategy ) {
207+ case self ::STRATEGY_MATCHING :
208+ $ this ->diffTableRowsWithMatches ($ oldRows , $ newRows , $ matches );
209+ break ;
210+
211+ case self ::STRATEGY_RELATIVE :
212+ $ this ->diffTableRows ($ oldRows , $ newRows , $ oldMatchData );
213+ break ;
214+
215+ default :
216+ $ this ->diffTableRowsWithMatches ($ oldRows , $ newRows , $ matches );
217+ break ;
218+ }
188219
189220 $ this ->content = $ this ->htmlFromNode ($ this ->diffTable );
190221 }
191222
223+ /**
224+ * @param TableRow[] $oldRows
225+ * @param TableRow[] $newRows
226+ * @param RowMatch[] $matches
227+ */
228+ protected function diffTableRowsWithMatches ($ oldRows , $ newRows , $ matches )
229+ {
230+ $ operations = array ();
231+
232+ $ indexInOld = 0 ;
233+ $ indexInNew = 0 ;
234+
235+ $ oldRowCount = count ($ oldRows );
236+ $ newRowCount = count ($ newRows );
237+
238+ $ matches [] = new RowMatch ($ newRowCount , $ oldRowCount , $ newRowCount , $ oldRowCount );
239+
240+ // build operations
241+ foreach ($ matches as $ match ) {
242+ $ matchAtIndexInOld = $ indexInOld === $ match ->getStartInOld ();
243+ $ matchAtIndexInNew = $ indexInNew === $ match ->getStartInNew ();
244+
245+ $ action = 'equal ' ;
246+
247+ if (!$ matchAtIndexInOld && !$ matchAtIndexInNew ) {
248+ $ action = 'replace ' ;
249+ } elseif ($ matchAtIndexInOld && !$ matchAtIndexInNew ) {
250+ $ action = 'insert ' ;
251+ } elseif (!$ matchAtIndexInOld && $ matchAtIndexInNew ) {
252+ $ action = 'delete ' ;
253+ }
254+
255+ if ($ action !== 'equal ' ) {
256+ $ operations [] = new Operation ($ action , $ indexInOld , $ match ->getStartInOld (), $ indexInNew , $ match ->getStartInNew ());
257+ }
258+
259+ $ operations [] = new Operation ('equal ' , $ match ->getStartInOld (), $ match ->getEndInOld (), $ match ->getStartInNew (), $ match ->getEndInNew ());
260+
261+ $ indexInOld = $ match ->getEndInOld ();
262+ $ indexInNew = $ match ->getEndInNew ();
263+ }
264+
265+ $ appliedRowSpans = array ();
266+
267+ // process operations
268+ foreach ($ operations as $ operation ) {
269+ switch ($ operation ->action ) {
270+ case 'equal ' :
271+ $ this ->processEqualOperation ($ operation , $ oldRows , $ newRows , $ appliedRowSpans );
272+ break ;
273+
274+ case 'delete ' :
275+ $ this ->processDeleteOperation ($ operation , $ oldRows , $ newRows , $ appliedRowSpans );
276+ break ;
277+
278+ case 'insert ' :
279+ $ this ->processInsertOperation ($ operation , $ oldRows , $ newRows , $ appliedRowSpans );
280+ break ;
281+
282+ case 'replace ' :
283+ $ this ->processReplaceOperation ($ operation , $ oldRows , $ newRows , $ appliedRowSpans );
284+ break ;
285+ }
286+ }
287+ }
288+
289+ protected function processInsertOperation (Operation $ operation , $ oldRows , $ newRows , &$ appliedRowSpans , $ forceExpansion = false )
290+ {
291+ $ targetRows = array_slice ($ newRows , $ operation ->startInNew , $ operation ->endInNew - $ operation ->startInNew );
292+ foreach ($ targetRows as $ row ) {
293+ $ this ->diffAndAppendRows (null , $ row , $ appliedRowSpans , $ forceExpansion );
294+ }
295+ }
296+
297+ protected function processDeleteOperation ($ operation , $ oldRows , $ newRows , &$ appliedRowSpans , $ forceExpansion = false )
298+ {
299+ $ targetRows = array_slice ($ oldRows , $ operation ->startInOld , $ operation ->endInOld - $ operation ->startInOld );
300+ foreach ($ targetRows as $ row ) {
301+ $ this ->diffAndAppendRows ($ row , null , $ appliedRowSpans , $ forceExpansion );
302+ }
303+ }
304+
305+ protected function processEqualOperation ($ operation , $ oldRows , $ newRows , &$ appliedRowSpans )
306+ {
307+ $ targetOldRows = array_values (array_slice ($ oldRows , $ operation ->startInOld , $ operation ->endInOld - $ operation ->startInOld ));
308+ $ targetNewRows = array_values (array_slice ($ newRows , $ operation ->startInNew , $ operation ->endInNew - $ operation ->startInNew ));
309+
310+ foreach ($ targetNewRows as $ index => $ newRow ) {
311+ if (!isset ($ targetOldRows [$ index ])) {
312+ addDebugOutput ('failed finding matchign row ' , __METHOD__ );
313+ continue ;
314+ }
315+
316+ $ this ->diffAndAppendRows ($ targetOldRows [$ index ], $ newRow , $ appliedRowSpans );
317+ }
318+ }
319+
320+ protected function processReplaceOperation ($ operation , $ oldRows , $ newRows , &$ appliedRowSpans )
321+ {
322+ $ this ->processDeleteOperation ($ operation , $ oldRows , $ newRows , $ appliedRowSpans , true );
323+ $ this ->processInsertOperation ($ operation , $ oldRows , $ newRows , $ appliedRowSpans , true );
324+ }
325+
192326 protected function getRowMatches ($ oldMatchData , $ newMatchData )
193327 {
194328 $ matches = array ();
@@ -240,15 +374,15 @@ protected function findRowMatch($newMatchData, $startInOld, $endInOld, $startInN
240374 continue ;
241375 }
242376
243- if ($ newIndex > $ endInNew ) {
377+ if ($ newIndex >= $ endInNew ) {
244378 break ;
245379 }
246380 foreach ($ oldMatches as $ oldIndex => $ percentage ) {
247381 if ($ oldIndex < $ startInOld ) {
248382 continue ;
249383 }
250384
251- if ($ oldIndex > $ endInOld ) {
385+ if ($ oldIndex >= $ endInOld ) {
252386 break ;
253387 }
254388
@@ -257,13 +391,14 @@ protected function findRowMatch($newMatchData, $startInOld, $endInOld, $startInN
257391 $ bestMatch = array (
258392 'oldIndex ' => $ oldIndex ,
259393 'newIndex ' => $ newIndex ,
394+ 'percentage ' => $ percentage ,
260395 );
261396 }
262397 }
263398 }
264399
265400 if ($ bestMatch !== null ) {
266- return new RowMatch ($ bestMatch ['newIndex ' ], $ bestMatch ['oldIndex ' ], $ bestMatch ['newIndex ' ] + 1 , $ bestMatch ['oldIndex ' ] + 1 );
401+ return new RowMatch ($ bestMatch ['newIndex ' ], $ bestMatch ['oldIndex ' ], $ bestMatch ['newIndex ' ] + 1 , $ bestMatch ['oldIndex ' ] + 1 , $ bestMatch [ ' percentage ' ] );
267402 }
268403
269404 return null ;
@@ -826,33 +961,27 @@ protected function diffAndAppendRows($oldRow, $newRow, &$appliedRowSpans, $force
826961
827962 protected function getMatchPercentage (TableRow $ oldRow , TableRow $ newRow )
828963 {
829- $ matches = array () ;
964+ $ firstCellWeight = 3 ;
830965 $ thresholdCount = 0 ;
831- $ firstCellMatch = false ;
966+ $ totalCount = ( min ( count ( $ newRow -> getCells ()), count ( $ oldRow -> getCells ())) + $ firstCellWeight ) * 100 ;
832967 foreach ($ newRow ->getCells () as $ newIndex => $ newCell ) {
833968 $ oldCell = $ oldRow ->getCell ($ newIndex );
834969
835970 if ($ oldCell ) {
836971 $ percentage = null ;
837972 similar_text ($ oldCell ->getInnerHtml (), $ newCell ->getInnerHtml (), $ percentage );
838973
839- $ matches [$ newIndex ] = $ percentage ;
840-
841974 if ($ percentage > ($ this ->matchThreshold * 0.50 )) {
842- if ($ newIndex === 0 && $ percentage > 0.95 ) {
843- $ firstCellMatch = true ;
975+ $ increment = $ percentage ;
976+ if ($ newIndex === 0 && $ percentage > 95 ) {
977+ $ increment = $ increment * $ firstCellWeight ;
844978 }
845- $ thresholdCount++ ;
979+ $ thresholdCount += $ increment ;
846980 }
847981 }
848982 }
849983
850- $ matchPercentage = (count ($ matches ) > 0 ) ? ($ thresholdCount / count ($ matches )) : 0 ;
851-
852- if ($ firstCellMatch ) {
853- // @todo: Weight the first cell match higher
854- $ matchPercentage = $ matchPercentage * 1.50 ;
855- }
984+ $ matchPercentage = ($ totalCount > 0 ) ? ($ thresholdCount / $ totalCount ) : 0 ;
856985
857986 return $ matchPercentage ;
858987 }
0 commit comments