Skip to content

Commit 25bce7b

Browse files
authored
Ported ListDiff to DOMDocument
Ported ListDiff to DOMDocument
2 parents e7628e3 + 2a5a3ce commit 25bce7b

21 files changed

+318
-710
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -199,8 +199,8 @@ php-htmldiff is available under [GNU General Public License, version 2][gnu]. Se
199199
* Performance improvements (we have 1 benchmark test, we should probably get more)
200200
* Algorithm improvements - trimming alike text at start and ends, store nested diff results in memory to re-use (like we do w/ caching)
201201
* Benchmark using DOMDocument vs. alternatives vs. string parsing
202+
* Consider not using string parsing for HtmlDiff in order to avoid having to create many DOMDocument instances in ListDiff and TableDiff
202203
* Benchmarking
203-
* Look into removing dependency on php-simple-html-dom-parser library - possibly find alternative or no library at all. Consider how this affects performance.
204204
* Refactoring (but... tests first)
205205
* Overall design/architecture improvements
206206
* API improvements so a new HtmlDiff isn't required for each new diff (especially so that configuration can be re-used)

composer.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@
2121
"require": {
2222
"php": ">=7.3",
2323
"ezyang/htmlpurifier": "^4.7",
24-
"kub-at/php-simple-html-dom-parser": "^1.7"
24+
"ext-dom": "*",
25+
"ext-mbstring": "*"
2526
},
2627
"require-dev": {
2728
"phpunit/phpunit": "~9.0",

lib/Caxy/HtmlDiff/AbstractDiff.php

Lines changed: 8 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
namespace Caxy\HtmlDiff;
44

55
use Caxy\HtmlDiff\Util\MbStringUtil;
6+
use HTMLPurifier;
7+
use HTMLPurifier_Config;
68

79
/**
810
* Class AbstractDiff.
@@ -66,12 +68,12 @@ abstract class AbstractDiff
6668
protected $diffCaches = array();
6769

6870
/**
69-
* @var \HTMLPurifier|null
71+
* @var HTMLPurifier|null
7072
*/
7173
protected $purifier;
7274

7375
/**
74-
* @var \HTMLPurifier_Config|null
76+
* @var HTMLPurifier_Config|null
7577
*/
7678
protected $purifierConfig = null;
7779

@@ -129,7 +131,7 @@ public function initPurifier($defaultPurifierSerializerCache = null)
129131
if (null !== $this->purifierConfig) {
130132
$HTMLPurifierConfig = $this->purifierConfig;
131133
} else {
132-
$HTMLPurifierConfig = \HTMLPurifier_Config::createDefault();
134+
$HTMLPurifierConfig = HTMLPurifier_Config::createDefault();
133135
}
134136

135137
// Cache.SerializerPath defaults to Null and sets
@@ -144,7 +146,7 @@ public function initPurifier($defaultPurifierSerializerCache = null)
144146
// created by the web/php user (www-user, php-fpm, etc.)
145147
$HTMLPurifierConfig->set('Cache.SerializerPermissions', 0777);
146148

147-
$this->purifier = new \HTMLPurifier($HTMLPurifierConfig);
149+
$this->purifier = new HTMLPurifier($HTMLPurifierConfig);
148150
}
149151

150152
/**
@@ -373,33 +375,13 @@ public function isGroupDiffs()
373375
}
374376

375377
/**
376-
* @param \HTMLPurifier_Config $config
378+
* @param HTMLPurifier_Config $config
377379
*/
378-
public function setHTMLPurifierConfig(\HTMLPurifier_Config $config)
380+
public function setHTMLPurifierConfig(HTMLPurifier_Config $config)
379381
{
380382
$this->purifierConfig = $config;
381383
}
382384

383-
/**
384-
* @param string $tag
385-
*
386-
* @return string
387-
*/
388-
protected function getOpeningTag($tag)
389-
{
390-
return '/<'.$tag.'[^>]*/i';
391-
}
392-
393-
/**
394-
* @param string $tag
395-
*
396-
* @return string
397-
*/
398-
protected function getClosingTag($tag)
399-
{
400-
return '</'.$tag.'>';
401-
}
402-
403385
/**
404386
* @param string $html
405387
*

lib/Caxy/HtmlDiff/HtmlDiff.php

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,7 @@ protected function processInsertOperation($operation, $cssClass)
291291
}
292292
}
293293
}
294+
294295
$this->insertTag('ins', $cssClass, $text);
295296
}
296297

@@ -544,8 +545,7 @@ protected function insertTag($tag, $cssClass, &$words)
544545
$specialCaseTagInjectionIsBefore = false;
545546

546547
if (count($nonTags) !== 0) {
547-
$text = $this->wrapText(implode('', $nonTags), $tag, $cssClass);
548-
$this->content .= $text;
548+
$this->content .= $this->wrapText(implode('', $nonTags), $tag, $cssClass);
549549
} else {
550550
$firstOrDefault = false;
551551
foreach ($this->config->getSpecialCaseOpeningTags() as $x) {
@@ -615,15 +615,12 @@ protected function checkCondition($word, $condition)
615615
return $condition == 'tag' ? $this->isTag($word) : !$this->isTag($word);
616616
}
617617

618-
/**
619-
* @param string $text
620-
* @param string $tagName
621-
* @param string $cssClass
622-
*
623-
* @return string
624-
*/
625-
protected function wrapText($text, $tagName, $cssClass)
618+
protected function wrapText(string $text, string $tagName, string $cssClass) : string
626619
{
620+
if (trim($text) === '') {
621+
return '';
622+
}
623+
627624
return sprintf('<%1$s class="%2$s">%3$s</%1$s>', $tagName, $cssClass, $text);
628625
}
629626

0 commit comments

Comments
 (0)