@@ -95,6 +95,38 @@ function isEndOfAtomicTag(word: string, tag: string){
9595 return word . substring ( word . length - tag . length - 2 ) === ( '</' + tag ) ;
9696}
9797
98+ const styleTagsRegExp = / ^ < ( s t r o n g | e m ) / ;
99+
100+ /**
101+ * Checks if the current word is the beginning of an style tag. An style tag is one whose
102+ * child nodes should be compared, but the entire tag should be treated as one token. This
103+ * is useful for tags where it does not make sense to insert <ins> and <del> tags.
104+ *
105+ * @param {string } word The characters of the current token read so far.
106+ *
107+ * @return {string|null } The name of the atomic tag if the word will be an atomic tag,
108+ * null otherwise
109+ */
110+
111+ function isStartOfStyleTag ( word : string ) {
112+ var result = styleTagsRegExp . exec ( word ) ;
113+ return result && result [ 1 ] ;
114+ }
115+
116+ /**
117+ * Checks if the current word is the end of an style tag (i.e. it has all the characters,
118+ * except for the end bracket of the closing tag, such as '<strong></strong').
119+ *
120+ * @param {string } word The characters of the current token read so far.
121+ * @param {string } tag The ending tag to look for.
122+ *
123+ * @return {boolean } True if the word is now a complete token (including the end tag),
124+ * false otherwise.
125+ */
126+ function isEndOfStyleTag ( word : string , tag : string ) {
127+ return word . substring ( word . length - tag . length - 2 ) === ( '</' + tag ) ;
128+ }
129+
98130/**
99131 * Checks if a tag is a void tag.
100132 *
@@ -184,19 +216,25 @@ export function htmlToTokens(html: string): Token[] {
184216 let mode = 'char' ;
185217 let currentWord = '' ;
186218 let currentAtomicTag = '' ;
219+ let currentStyleTag = '' ;
187220 const words = [ ] ;
188221
189222 for ( const char of html ) {
190223 switch ( mode ) {
191224 case 'tag' : {
192225 const atomicTag = isStartOfAtomicTag ( currentWord ) ;
226+ const styleTag = isStartOfStyleTag ( currentWord ) ;
193227 if ( atomicTag ) {
194228 mode = 'atomic_tag' ;
195229 currentAtomicTag = atomicTag ;
196230 currentWord += char ;
197231 } else if ( isStartOfHTMLComment ( currentWord ) ) {
198232 mode = 'html_comment' ;
199233 currentWord += char ;
234+ } else if ( styleTag ) {
235+ mode = 'style_tag' ;
236+ currentStyleTag = styleTag ;
237+ currentWord = '<nobr>' + currentWord + char ;
200238 } else if ( isEndOfTag ( char ) ) {
201239 currentWord += '>' ;
202240 words . push ( createToken ( currentWord ) ) ;
@@ -229,6 +267,26 @@ export function htmlToTokens(html: string): Token[] {
229267 mode = 'char' ;
230268 }
231269 break ;
270+ case 'style_tag' :
271+ if ( isEndOfTag ( char ) && isEndOfStyleTag ( currentWord , currentStyleTag ) ) {
272+ currentWord += '>' + '</nobr>' ;
273+ words . push ( createToken ( currentWord ) ) ;
274+ currentWord = '' ;
275+ currentStyleTag = '' ;
276+ mode = 'char' ;
277+ }
278+ else {
279+ // break up styled blocks into individual styled words
280+ if ( / ( \s + | & n b s p ; | & # 1 6 0 ; ) / . test ( char ) ) {
281+ currentWord += '</' + currentStyleTag + '>' ;
282+ if ( currentWord ) {
283+ words . push ( createToken ( currentWord ) ) ;
284+ }
285+ currentWord = '<' + currentStyleTag + '>' ;
286+ }
287+ currentWord += char ;
288+ }
289+ break ;
232290 case 'char' :
233291 if ( isStartOfTag ( char ) ) {
234292 if ( currentWord ) {
@@ -330,6 +388,12 @@ function getKeyForToken(token: string){
330388 return `<iframe src="${ iframe [ 1 ] } "></iframe>` ;
331389 }
332390
391+ // Treat entire style tag as needing to be compared
392+ var styleTag = / ^ < ( s t r o n g | e m ) [ \s > ] / . exec ( token ) ;
393+ if ( styleTag ) {
394+ return token ;
395+ }
396+
333397 // If the token is any other element, just grab the tag name.
334398 const tagName = / < ( [ ^ \s > ] + ) [ \s > ] / . exec ( token ) ;
335399 if ( tagName ) {
0 commit comments