@@ -14,6 +14,7 @@ class HtmlDiff
14
14
private $ specialCaseOpeningTags = array ();
15
15
private $ specialCaseClosingTags = array ();
16
16
private $ specialCaseTags = array ('strong ' , 'b ' , 'i ' , 'big ' , 'small ' , 'u ' , 'sub ' , 'sup ' , 'strike ' , 's ' , 'p ' );
17
+ private $ specialCaseChars = array ('. ' , ', ' , '( ' , ') ' , '\'' );
17
18
private $ groupDiffs = true ;
18
19
19
20
public function __construct ($ oldText , $ newText , $ encoding = 'UTF-8 ' , $ specialCaseTags = array (), $ groupDiffs = true )
@@ -26,6 +27,31 @@ public function __construct($oldText, $newText, $encoding = 'UTF-8', $specialCas
26
27
27
28
$ this ->setSpecialCaseTags ($ specialCaseTags );
28
29
}
30
+
31
+ public function setSpecialCaseChars (array $ chars )
32
+ {
33
+ $ this ->specialCaseChars = $ chars ;
34
+ }
35
+
36
+ public function getSpecialCaseChars ()
37
+ {
38
+ return $ this ->specialCaseChars ;
39
+ }
40
+
41
+ public function addSpecialCaseChar ($ char )
42
+ {
43
+ if (!in_array ($ char , $ this ->specialCaseChars )) {
44
+ $ this ->specialCaseChars [] = $ char ;
45
+ }
46
+ }
47
+
48
+ public function removeSpecialCaseChar ($ char )
49
+ {
50
+ $ key = array_search ($ char , $ this ->specialCaseChars );
51
+ if ($ key !== false ) {
52
+ unset($ this ->specialCaseChars [$ key ]);
53
+ }
54
+ }
29
55
30
56
public function setSpecialCaseTags (array $ tags = array ())
31
57
{
@@ -173,13 +199,18 @@ private function splitInputsToWords()
173
199
$ this ->oldWords = $ this ->convertHtmlToListOfWords ( $ this ->explode ( $ this ->oldText ) );
174
200
$ this ->newWords = $ this ->convertHtmlToListOfWords ( $ this ->explode ( $ this ->newText ) );
175
201
}
202
+
203
+ private function isPartOfWord ($ text )
204
+ {
205
+ return ctype_alnum (str_replace ($ this ->specialCaseChars , '' , $ text ));
206
+ }
176
207
177
208
private function convertHtmlToListOfWords ($ characterString )
178
209
{
179
210
$ mode = 'character ' ;
180
211
$ current_word = '' ;
181
212
$ words = array ();
182
- foreach ($ characterString as $ character ) {
213
+ foreach ($ characterString as $ i => $ character ) {
183
214
switch ($ mode ) {
184
215
case 'character ' :
185
216
if ( $ this ->isStartOfTag ( $ character ) ) {
@@ -195,7 +226,10 @@ private function convertHtmlToListOfWords($characterString)
195
226
$ current_word = $ character ;
196
227
$ mode = 'whitespace ' ;
197
228
} else {
198
- if ( ctype_alnum ( $ character ) && ( strlen ($ current_word ) == 0 || ctype_alnum ( $ current_word ) ) ) {
229
+ if (
230
+ (ctype_alnum ($ character ) && (strlen ($ current_word ) == 0 || $ this ->isPartOfWord ($ current_word ))) ||
231
+ (in_array ($ character , $ this ->specialCaseChars ) && isset ($ characterString [$ i +1 ]) && $ this ->isPartOfWord ($ characterString [$ i +1 ]))
232
+ ) {
199
233
$ current_word .= $ character ;
200
234
} else {
201
235
$ words [] = $ current_word ;
0 commit comments