Skip to content

Commit ea1675a

Browse files
committed
added extra tags, regexp modifications, stronger typing
1 parent e563a7a commit ea1675a

File tree

1 file changed

+11
-6
lines changed

1 file changed

+11
-6
lines changed

src/htmldiff.ts

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,10 @@ function isWhitespace(char: string): boolean {
3737
return /^\s+$/.test(char);
3838
}
3939

40+
function exhaustive(a: never): never {
41+
return a;
42+
}
43+
4044

4145
const tagRegExp = /^\s*<([^!>][^>]*)>\s*$/;
4246
/**
@@ -95,7 +99,7 @@ function isEndOfAtomicTag(word: string, tag: string){
9599
return word.substring(word.length - tag.length - 2) === ('</' + tag);
96100
}
97101

98-
const styleTagsRegExp = /^<(strong|em)/;
102+
const styleTagsRegExp = /^<(strong|em|b|i|q|cite|blockquote|mark|dfn|sup|sub|u|s)(^(?!\w)|>)/;
99103

100104
/**
101105
* Checks if the current word is the beginning of an style tag. An style tag is one whose
@@ -109,7 +113,7 @@ const styleTagsRegExp = /^<(strong|em)/;
109113
*/
110114

111115
function isStartOfStyleTag(word: string) {
112-
var result = styleTagsRegExp.exec(word);
116+
const result = styleTagsRegExp.exec(word);
113117
return result && result[1];
114118
}
115119

@@ -205,6 +209,7 @@ function makeMatch(startInBefore: number, startInAfter: number, length: number,
205209
segmentEndInAfter: startInAfter + length - 1
206210
};}
207211

212+
type ParseMode = 'char' | 'tag' | 'atomic_tag' | 'style_tag' | 'html_comment' | 'whitespace';
208213
/**
209214
* Tokenizes a string of HTML.
210215
*
@@ -213,7 +218,7 @@ function makeMatch(startInBefore: number, startInAfter: number, length: number,
213218
* @return {Array.<string>} The list of tokens.
214219
*/
215220
export function htmlToTokens(html: string): Token[] {
216-
let mode = 'char';
221+
let mode: ParseMode = 'char';
217222
let currentWord = '';
218223
let currentAtomicTag = '';
219224
let currentStyleTag = '';
@@ -223,7 +228,7 @@ export function htmlToTokens(html: string): Token[] {
223228
switch (mode){
224229
case 'tag': {
225230
const atomicTag = isStartOfAtomicTag(currentWord);
226-
const styleTag = isStartOfStyleTag(currentWord);
231+
const styleTag = isStartOfStyleTag(currentWord + char);
227232
if (atomicTag){
228233
mode = 'atomic_tag';
229234
currentAtomicTag = atomicTag;
@@ -331,7 +336,7 @@ export function htmlToTokens(html: string): Token[] {
331336
}
332337
break;
333338
default:
334-
throw new Error('Unknown mode ' + mode);
339+
return exhaustive(mode);
335340
}
336341
}
337342
if (currentWord){
@@ -389,7 +394,7 @@ function getKeyForToken(token: string){
389394
}
390395

391396
// Treat entire style tag as needing to be compared
392-
var styleTag = /^<(strong|em)[\s>]/.exec(token);
397+
const styleTag = styleTagsRegExp.exec(token);
393398
if (styleTag) {
394399
return token;
395400
}

0 commit comments

Comments
 (0)