Skip to content

Commit 36e102a

Browse files
committed
Start on enhanced row matching logic
1 parent 7a3a367 commit 36e102a

File tree

2 files changed

+235
-10
lines changed

2 files changed

+235
-10
lines changed
Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
<?php
2+
3+
namespace Caxy\HtmlDiff\Table;
4+
5+
class RowMatch
6+
{
7+
/**
8+
* @var int
9+
*/
10+
protected $startInNew;
11+
12+
/**
13+
* @var int
14+
*/
15+
protected $startInOld;
16+
17+
/**
18+
* @var int
19+
*/
20+
protected $endInNew;
21+
22+
/**
23+
* @var int
24+
*/
25+
protected $endInOld;
26+
27+
/**
28+
* RowMatch constructor.
29+
*
30+
* @param $startInNew
31+
* @param $startInOld
32+
* @param $endInNew
33+
* @param $endInOld
34+
*/
35+
public function __construct($startInNew = 0, $startInOld = 0, $endInNew = 0, $endInOld = 0)
36+
{
37+
$this->startInNew = $startInNew;
38+
$this->startInOld = $startInOld;
39+
$this->endInNew = $endInNew;
40+
$this->endInOld = $endInOld;
41+
}
42+
43+
/**
44+
* @return int
45+
*/
46+
public function getStartInNew()
47+
{
48+
return $this->startInNew;
49+
}
50+
51+
/**
52+
* @param int $startInNew
53+
*
54+
* @return RowMatch
55+
*/
56+
public function setStartInNew($startInNew)
57+
{
58+
$this->startInNew = $startInNew;
59+
60+
return $this;
61+
}
62+
63+
/**
64+
* @return int
65+
*/
66+
public function getStartInOld()
67+
{
68+
return $this->startInOld;
69+
}
70+
71+
/**
72+
* @param int $startInOld
73+
*
74+
* @return RowMatch
75+
*/
76+
public function setStartInOld($startInOld)
77+
{
78+
$this->startInOld = $startInOld;
79+
80+
return $this;
81+
}
82+
83+
/**
84+
* @return int
85+
*/
86+
public function getEndInNew()
87+
{
88+
return $this->endInNew;
89+
}
90+
91+
/**
92+
* @param int $endInNew
93+
*
94+
* @return RowMatch
95+
*/
96+
public function setEndInNew($endInNew)
97+
{
98+
$this->endInNew = $endInNew;
99+
100+
return $this;
101+
}
102+
103+
/**
104+
* @return int
105+
*/
106+
public function getEndInOld()
107+
{
108+
return $this->endInOld;
109+
}
110+
111+
/**
112+
* @param int $endInOld
113+
*
114+
* @return RowMatch
115+
*/
116+
public function setEndInOld($endInOld)
117+
{
118+
$this->endInOld = $endInOld;
119+
120+
return $this;
121+
}
122+
123+
124+
}

lib/Caxy/HtmlDiff/Table/TableDiff.php

Lines changed: 111 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -153,8 +153,6 @@ protected function diffTableContent()
153153
$oldRows = $this->oldTable->getRows();
154154
$newRows = $this->newTable->getRows();
155155

156-
$appliedRowSpans = array();
157-
158156
$oldMatchData = array();
159157
$newMatchData = array();
160158

@@ -173,23 +171,117 @@ protected function diffTableContent()
173171
$newText = $newRow->getInnerHtml();
174172

175173
// similar_text
176-
$percentage = null;
177-
similar_text($oldText, $newText, $percentage);
174+
// $percentage = null;
175+
// similar_text($oldText, $newText, $percentage);
178176
$percentage = $this->getMatchPercentage($oldRow, $newRow);
179177

180178
$oldMatchData[$oldIndex][$newIndex] = $percentage;
181179
$newMatchData[$newIndex][$oldIndex] = $percentage;
182180
}
183181
}
184182

183+
$matches = $this->getRowMatches($oldMatchData, $newMatchData);
184+
185+
addDebugOutput($matches, __METHOD__);
186+
187+
$this->diffTableRows($oldRows, $newRows, $oldMatchData);
188+
189+
$this->content = $this->htmlFromNode($this->diffTable);
190+
}
191+
192+
protected function getRowMatches($oldMatchData, $newMatchData)
193+
{
194+
$matches = array();
195+
196+
$startInOld = 0;
197+
$startInNew = 0;
198+
$endInOld = count($oldMatchData);
199+
$endInNew = count($newMatchData);
200+
201+
$this->findRowMatches($newMatchData, $startInOld, $endInOld, $startInNew, $endInNew, $matches);
202+
203+
return $matches;
204+
}
205+
206+
protected function findRowMatches($newMatchData, $startInOld, $endInOld, $startInNew, $endInNew, &$matches)
207+
{
208+
$match = $this->findRowMatch($newMatchData, $startInOld, $endInOld, $startInNew, $endInNew);
209+
if ($match !== null) {
210+
if ($startInOld < $match->getStartInOld() &&
211+
$startInNew < $match->getStartInNew()
212+
) {
213+
$this->findRowMatches(
214+
$newMatchData,
215+
$startInOld,
216+
$match->getStartInOld(),
217+
$startInNew,
218+
$match->getStartInNew(),
219+
$matches
220+
);
221+
}
222+
223+
$matches[] = $match;
224+
225+
if ($match->getEndInOld() < $endInOld &&
226+
$match->getEndInNew() < $endInNew
227+
) {
228+
$this->findRowMatches($newMatchData, $match->getEndInOld(), $endInOld, $match->getEndInNew(), $endInNew, $matches);
229+
}
230+
}
231+
}
232+
233+
protected function findRowMatch($newMatchData, $startInOld, $endInOld, $startInNew, $endInNew)
234+
{
235+
$bestMatch = null;
236+
$bestPercentage = 0;
237+
238+
foreach ($newMatchData as $newIndex => $oldMatches) {
239+
if ($newIndex < $startInNew) {
240+
continue;
241+
}
242+
243+
if ($newIndex > $endInNew) {
244+
break;
245+
}
246+
foreach ($oldMatches as $oldIndex => $percentage) {
247+
if ($oldIndex < $startInOld) {
248+
continue;
249+
}
250+
251+
if ($oldIndex > $endInOld) {
252+
break;
253+
}
254+
255+
if ($percentage > $bestPercentage) {
256+
$bestPercentage = $percentage;
257+
$bestMatch = array(
258+
'oldIndex' => $oldIndex,
259+
'newIndex' => $newIndex,
260+
);
261+
}
262+
}
263+
}
264+
265+
if ($bestMatch !== null) {
266+
return new RowMatch($bestMatch['newIndex'], $bestMatch['oldIndex'], $bestMatch['newIndex'] + 1, $bestMatch['oldIndex'] + 1);
267+
}
268+
269+
return null;
270+
}
271+
272+
/**
273+
* @param $oldRows
274+
* @param $newRows
275+
* @param $oldMatchData
276+
*/
277+
protected function diffTableRows($oldRows, $newRows, $oldMatchData)
278+
{
279+
$appliedRowSpans = array();
185280
$currentIndexInOld = 0;
186-
$currentIndexInNew = 0;
187281
$oldCount = count($oldRows);
188282
$newCount = count($newRows);
189283
$difference = max($oldCount, $newCount) - min($oldCount, $newCount);
190284

191-
// $this->matchThreshold = ($this->matchThreshold * 0.80);
192-
193285
foreach ($newRows as $newIndex => $row) {
194286
$oldRow = $this->oldTable->getRow($currentIndexInOld);
195287

@@ -240,8 +332,6 @@ protected function diffTableContent()
240332
$this->diffAndAppendRows($row, null, $appliedRowSpans);
241333
}
242334
}
243-
244-
$this->content = $this->htmlFromNode($this->diffTable);
245335
}
246336

247337
/**
@@ -738,6 +828,7 @@ protected function getMatchPercentage(TableRow $oldRow, TableRow $newRow)
738828
{
739829
$matches = array();
740830
$thresholdCount = 0;
831+
$firstCellMatch = false;
741832
foreach ($newRow->getCells() as $newIndex => $newCell) {
742833
$oldCell = $oldRow->getCell($newIndex);
743834

@@ -748,11 +839,21 @@ protected function getMatchPercentage(TableRow $oldRow, TableRow $newRow)
748839
$matches[$newIndex] = $percentage;
749840

750841
if ($percentage > ($this->matchThreshold * 0.50)) {
842+
if ($newIndex === 0 && $percentage > 0.95) {
843+
$firstCellMatch = true;
844+
}
751845
$thresholdCount++;
752846
}
753847
}
754848
}
755849

756-
return (count($matches) > 0) ? $thresholdCount / count($matches) : 0;
850+
$matchPercentage = (count($matches) > 0) ? ($thresholdCount / count($matches)) : 0;
851+
852+
if ($firstCellMatch) {
853+
// @todo: Weight the first cell match higher
854+
$matchPercentage = $matchPercentage * 1.50;
855+
}
856+
857+
return $matchPercentage;
757858
}
758859
}

0 commit comments

Comments
 (0)