Skip to content

Commit e817cc3

Browse files
committed
html diff support style tags
1 parent 3e2482b commit e817cc3

File tree

1 file changed

+64
-0
lines changed

1 file changed

+64
-0
lines changed

src/htmldiff.ts

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,38 @@ function isEndOfAtomicTag(word: string, tag: string){
9595
return word.substring(word.length - tag.length - 2) === ('</' + tag);
9696
}
9797

98+
const styleTagsRegExp = /^<(strong|em)/;
99+
100+
/**
101+
* Checks if the current word is the beginning of an style tag. An style tag is one whose
102+
* child nodes should be compared, but the entire tag should be treated as one token. This
103+
* is useful for tags where it does not make sense to insert <ins> and <del> tags.
104+
*
105+
* @param {string} word The characters of the current token read so far.
106+
*
107+
* @return {string|null} The name of the atomic tag if the word will be an atomic tag,
108+
* null otherwise
109+
*/
110+
111+
function isStartOfStyleTag(word: string) {
112+
var result = styleTagsRegExp.exec(word);
113+
return result && result[1];
114+
}
115+
116+
/**
117+
* Checks if the current word is the end of an style tag (i.e. it has all the characters,
118+
* except for the end bracket of the closing tag, such as '<strong></strong').
119+
*
120+
* @param {string} word The characters of the current token read so far.
121+
* @param {string} tag The ending tag to look for.
122+
*
123+
* @return {boolean} True if the word is now a complete token (including the end tag),
124+
* false otherwise.
125+
*/
126+
function isEndOfStyleTag(word: string, tag: string) {
127+
return word.substring(word.length - tag.length - 2) === ('</' + tag);
128+
}
129+
98130
/**
99131
* Checks if a tag is a void tag.
100132
*
@@ -184,19 +216,25 @@ export function htmlToTokens(html: string): Token[] {
184216
let mode = 'char';
185217
let currentWord = '';
186218
let currentAtomicTag = '';
219+
let currentStyleTag = '';
187220
const words = [];
188221

189222
for (const char of html) {
190223
switch (mode){
191224
case 'tag': {
192225
const atomicTag = isStartOfAtomicTag(currentWord);
226+
const styleTag = isStartOfStyleTag(currentWord);
193227
if (atomicTag){
194228
mode = 'atomic_tag';
195229
currentAtomicTag = atomicTag;
196230
currentWord += char;
197231
} else if (isStartOfHTMLComment(currentWord)){
198232
mode = 'html_comment';
199233
currentWord += char;
234+
} else if (styleTag) {
235+
mode = 'style_tag';
236+
currentStyleTag = styleTag;
237+
currentWord = '<nobr>' + currentWord + char;
200238
} else if (isEndOfTag(char)){
201239
currentWord += '>';
202240
words.push(createToken(currentWord));
@@ -229,6 +267,26 @@ export function htmlToTokens(html: string): Token[] {
229267
mode = 'char';
230268
}
231269
break;
270+
case 'style_tag':
271+
if (isEndOfTag(char) && isEndOfStyleTag(currentWord, currentStyleTag)) {
272+
currentWord += '>' + '</nobr>';
273+
words.push(createToken(currentWord));
274+
currentWord = '';
275+
currentStyleTag = '';
276+
mode = 'char';
277+
}
278+
else {
279+
// break up styled blocks into individual styled words
280+
if (/(\s+|&nbsp;|&#160;)/.test(char)) {
281+
currentWord += '</' + currentStyleTag + '>';
282+
if (currentWord) {
283+
words.push(createToken(currentWord));
284+
}
285+
currentWord = '<' + currentStyleTag + '>';
286+
}
287+
currentWord += char;
288+
}
289+
break;
232290
case 'char':
233291
if (isStartOfTag(char)){
234292
if (currentWord){
@@ -330,6 +388,12 @@ function getKeyForToken(token: string){
330388
return `<iframe src="${iframe[1]}"></iframe>`;
331389
}
332390

391+
// Treat entire style tag as needing to be compared
392+
var styleTag = /^<(strong|em)[\s>]/.exec(token);
393+
if (styleTag) {
394+
return token;
395+
}
396+
333397
// If the token is any other element, just grab the tag name.
334398
const tagName = /<([^\s>]+)[\s>]/.exec(token);
335399
if (tagName){

0 commit comments

Comments
 (0)