Skip to content

Commit a9a6789

Browse files
committed
Add new matching strategy
1 parent 36e102a commit a9a6789

File tree

2 files changed

+153
-18
lines changed

2 files changed

+153
-18
lines changed

lib/Caxy/HtmlDiff/Table/RowMatch.php

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,11 @@ class RowMatch
2424
*/
2525
protected $endInOld;
2626

27+
/**
28+
* @var int|null
29+
*/
30+
protected $percentage;
31+
2732
/**
2833
* RowMatch constructor.
2934
*
@@ -32,12 +37,13 @@ class RowMatch
3237
* @param $endInNew
3338
* @param $endInOld
3439
*/
35-
public function __construct($startInNew = 0, $startInOld = 0, $endInNew = 0, $endInOld = 0)
40+
public function __construct($startInNew = 0, $startInOld = 0, $endInNew = 0, $endInOld = 0, $percentage = null)
3641
{
3742
$this->startInNew = $startInNew;
3843
$this->startInOld = $startInOld;
3944
$this->endInNew = $endInNew;
4045
$this->endInOld = $endInOld;
46+
$this->percentage = $percentage;
4147
}
4248

4349
/**

lib/Caxy/HtmlDiff/Table/TableDiff.php

Lines changed: 146 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
use Caxy\HtmlDiff\AbstractDiff;
66
use Caxy\HtmlDiff\HtmlDiff;
7+
use Caxy\HtmlDiff\Operation;
78

89
/**
910
* @todo Add getters to TableMatch entity
@@ -15,6 +16,9 @@
1516
*/
1617
class TableDiff extends AbstractDiff
1718
{
19+
const STRATEGY_MATCHING = 'matching';
20+
const STRATEGY_RELATIVE = 'relative';
21+
1822
/**
1923
* @var null|Table
2024
*/
@@ -55,6 +59,8 @@ class TableDiff extends AbstractDiff
5559
*/
5660
protected $purifier;
5761

62+
protected $strategy = self::STRATEGY_MATCHING;
63+
5864
public function __construct($oldText, $newText, $encoding, $specialCaseTags, $groupDiffs)
5965
{
6066
parent::__construct($oldText, $newText, $encoding, $specialCaseTags, $groupDiffs);
@@ -63,6 +69,18 @@ public function __construct($oldText, $newText, $encoding, $specialCaseTags, $gr
6369
$this->purifier = new \HTMLPurifier($config);
6470
}
6571

72+
public function setStrategy($strategy)
73+
{
74+
$this->strategy = $strategy;
75+
76+
return $this;
77+
}
78+
79+
public function getStrategy()
80+
{
81+
return $this->strategy;
82+
}
83+
6684
public function build()
6785
{
6886
$this->buildTableDoms();
@@ -184,11 +202,127 @@ protected function diffTableContent()
184202

185203
addDebugOutput($matches, __METHOD__);
186204

187-
$this->diffTableRows($oldRows, $newRows, $oldMatchData);
205+
// new solution for diffing rows
206+
switch ($this->strategy) {
207+
case self::STRATEGY_MATCHING:
208+
$this->diffTableRowsWithMatches($oldRows, $newRows, $matches);
209+
break;
210+
211+
case self::STRATEGY_RELATIVE:
212+
$this->diffTableRows($oldRows, $newRows, $oldMatchData);
213+
break;
214+
215+
default:
216+
$this->diffTableRowsWithMatches($oldRows, $newRows, $matches);
217+
break;
218+
}
188219

189220
$this->content = $this->htmlFromNode($this->diffTable);
190221
}
191222

223+
/**
224+
* @param TableRow[] $oldRows
225+
* @param TableRow[] $newRows
226+
* @param RowMatch[] $matches
227+
*/
228+
protected function diffTableRowsWithMatches($oldRows, $newRows, $matches)
229+
{
230+
$operations = array();
231+
232+
$indexInOld = 0;
233+
$indexInNew = 0;
234+
235+
$oldRowCount = count($oldRows);
236+
$newRowCount = count($newRows);
237+
238+
$matches[] = new RowMatch($newRowCount, $oldRowCount, $newRowCount, $oldRowCount);
239+
240+
// build operations
241+
foreach ($matches as $match) {
242+
$matchAtIndexInOld = $indexInOld === $match->getStartInOld();
243+
$matchAtIndexInNew = $indexInNew === $match->getStartInNew();
244+
245+
$action = 'equal';
246+
247+
if (!$matchAtIndexInOld && !$matchAtIndexInNew) {
248+
$action = 'replace';
249+
} elseif ($matchAtIndexInOld && !$matchAtIndexInNew) {
250+
$action = 'insert';
251+
} elseif (!$matchAtIndexInOld && $matchAtIndexInNew) {
252+
$action = 'delete';
253+
}
254+
255+
if ($action !== 'equal') {
256+
$operations[] = new Operation($action, $indexInOld, $match->getStartInOld(), $indexInNew, $match->getStartInNew());
257+
}
258+
259+
$operations[] = new Operation('equal', $match->getStartInOld(), $match->getEndInOld(), $match->getStartInNew(), $match->getEndInNew());
260+
261+
$indexInOld = $match->getEndInOld();
262+
$indexInNew = $match->getEndInNew();
263+
}
264+
265+
$appliedRowSpans = array();
266+
267+
// process operations
268+
foreach ($operations as $operation) {
269+
switch ($operation->action) {
270+
case 'equal':
271+
$this->processEqualOperation($operation, $oldRows, $newRows, $appliedRowSpans);
272+
break;
273+
274+
case 'delete':
275+
$this->processDeleteOperation($operation, $oldRows, $newRows, $appliedRowSpans);
276+
break;
277+
278+
case 'insert':
279+
$this->processInsertOperation($operation, $oldRows, $newRows, $appliedRowSpans);
280+
break;
281+
282+
case 'replace':
283+
$this->processReplaceOperation($operation, $oldRows, $newRows, $appliedRowSpans);
284+
break;
285+
}
286+
}
287+
}
288+
289+
protected function processInsertOperation(Operation $operation, $oldRows, $newRows, &$appliedRowSpans, $forceExpansion = false)
290+
{
291+
$targetRows = array_slice($newRows, $operation->startInNew, $operation->endInNew - $operation->startInNew);
292+
foreach ($targetRows as $row) {
293+
$this->diffAndAppendRows(null, $row, $appliedRowSpans, $forceExpansion);
294+
}
295+
}
296+
297+
protected function processDeleteOperation($operation, $oldRows, $newRows, &$appliedRowSpans, $forceExpansion = false)
298+
{
299+
$targetRows = array_slice($oldRows, $operation->startInOld, $operation->endInOld - $operation->startInOld);
300+
foreach ($targetRows as $row) {
301+
$this->diffAndAppendRows($row, null, $appliedRowSpans, $forceExpansion);
302+
}
303+
}
304+
305+
protected function processEqualOperation($operation, $oldRows, $newRows, &$appliedRowSpans)
306+
{
307+
$targetOldRows = array_values(array_slice($oldRows, $operation->startInOld, $operation->endInOld - $operation->startInOld));
308+
$targetNewRows = array_values(array_slice($newRows, $operation->startInNew, $operation->endInNew - $operation->startInNew));
309+
310+
foreach ($targetNewRows as $index => $newRow) {
311+
if (!isset($targetOldRows[$index])) {
312+
addDebugOutput('failed finding matchign row', __METHOD__);
313+
continue;
314+
}
315+
316+
$this->diffAndAppendRows($targetOldRows[$index], $newRow, $appliedRowSpans);
317+
}
318+
}
319+
320+
protected function processReplaceOperation($operation, $oldRows, $newRows, &$appliedRowSpans)
321+
{
322+
$this->processDeleteOperation($operation, $oldRows, $newRows, $appliedRowSpans, true);
323+
$this->processInsertOperation($operation, $oldRows, $newRows, $appliedRowSpans, true);
324+
}
325+
192326
protected function getRowMatches($oldMatchData, $newMatchData)
193327
{
194328
$matches = array();
@@ -240,15 +374,15 @@ protected function findRowMatch($newMatchData, $startInOld, $endInOld, $startInN
240374
continue;
241375
}
242376

243-
if ($newIndex > $endInNew) {
377+
if ($newIndex >= $endInNew) {
244378
break;
245379
}
246380
foreach ($oldMatches as $oldIndex => $percentage) {
247381
if ($oldIndex < $startInOld) {
248382
continue;
249383
}
250384

251-
if ($oldIndex > $endInOld) {
385+
if ($oldIndex >= $endInOld) {
252386
break;
253387
}
254388

@@ -257,13 +391,14 @@ protected function findRowMatch($newMatchData, $startInOld, $endInOld, $startInN
257391
$bestMatch = array(
258392
'oldIndex' => $oldIndex,
259393
'newIndex' => $newIndex,
394+
'percentage' => $percentage,
260395
);
261396
}
262397
}
263398
}
264399

265400
if ($bestMatch !== null) {
266-
return new RowMatch($bestMatch['newIndex'], $bestMatch['oldIndex'], $bestMatch['newIndex'] + 1, $bestMatch['oldIndex'] + 1);
401+
return new RowMatch($bestMatch['newIndex'], $bestMatch['oldIndex'], $bestMatch['newIndex'] + 1, $bestMatch['oldIndex'] + 1, $bestMatch['percentage']);
267402
}
268403

269404
return null;
@@ -826,33 +961,27 @@ protected function diffAndAppendRows($oldRow, $newRow, &$appliedRowSpans, $force
826961

827962
protected function getMatchPercentage(TableRow $oldRow, TableRow $newRow)
828963
{
829-
$matches = array();
964+
$firstCellWeight = 3;
830965
$thresholdCount = 0;
831-
$firstCellMatch = false;
966+
$totalCount = (min(count($newRow->getCells()), count($oldRow->getCells())) + $firstCellWeight) * 100;
832967
foreach ($newRow->getCells() as $newIndex => $newCell) {
833968
$oldCell = $oldRow->getCell($newIndex);
834969

835970
if ($oldCell) {
836971
$percentage = null;
837972
similar_text($oldCell->getInnerHtml(), $newCell->getInnerHtml(), $percentage);
838973

839-
$matches[$newIndex] = $percentage;
840-
841974
if ($percentage > ($this->matchThreshold * 0.50)) {
842-
if ($newIndex === 0 && $percentage > 0.95) {
843-
$firstCellMatch = true;
975+
$increment = $percentage;
976+
if ($newIndex === 0 && $percentage > 95) {
977+
$increment = $increment * $firstCellWeight;
844978
}
845-
$thresholdCount++;
979+
$thresholdCount += $increment;
846980
}
847981
}
848982
}
849983

850-
$matchPercentage = (count($matches) > 0) ? ($thresholdCount / count($matches)) : 0;
851-
852-
if ($firstCellMatch) {
853-
// @todo: Weight the first cell match higher
854-
$matchPercentage = $matchPercentage * 1.50;
855-
}
984+
$matchPercentage = ($totalCount > 0) ? ($thresholdCount / $totalCount) : 0;
856985

857986
return $matchPercentage;
858987
}

0 commit comments

Comments
 (0)