@@ -37,6 +37,10 @@ function isWhitespace(char: string): boolean {
3737 return / ^ \s + $ / . test ( char ) ;
3838}
3939
40+ function exhaustive ( a : never ) : never {
41+ return a ;
42+ }
43+
4044
4145const tagRegExp = / ^ \s * < ( [ ^ ! > ] [ ^ > ] * ) > \s * $ / ;
4246/**
@@ -95,7 +99,7 @@ function isEndOfAtomicTag(word: string, tag: string){
9599 return word . substring ( word . length - tag . length - 2 ) === ( '</' + tag ) ;
96100}
97101
98- const styleTagsRegExp = / ^ < ( s t r o n g | e m ) / ;
102+ const styleTagsRegExp = / ^ < ( s t r o n g | e m | b | i | q | c i t e | b l o c k q u o t e | m a r k | d f n | s u p | s u b | u | s ) ( ^ (? ! \w ) | > ) / ;
99103
100104/**
101105 * Checks if the current word is the beginning of an style tag. An style tag is one whose
@@ -109,7 +113,7 @@ const styleTagsRegExp = /^<(strong|em)/;
109113 */
110114
111115function isStartOfStyleTag ( word : string ) {
112- var result = styleTagsRegExp . exec ( word ) ;
116+ const result = styleTagsRegExp . exec ( word ) ;
113117 return result && result [ 1 ] ;
114118}
115119
@@ -205,6 +209,7 @@ function makeMatch(startInBefore: number, startInAfter: number, length: number,
205209 segmentEndInAfter : startInAfter + length - 1
206210 } ; }
207211
212+ type ParseMode = 'char' | 'tag' | 'atomic_tag' | 'style_tag' | 'html_comment' | 'whitespace' ;
208213/**
209214 * Tokenizes a string of HTML.
210215 *
@@ -213,7 +218,7 @@ function makeMatch(startInBefore: number, startInAfter: number, length: number,
213218 * @return {Array.<string> } The list of tokens.
214219 */
215220export function htmlToTokens ( html : string ) : Token [ ] {
216- let mode = 'char' ;
221+ let mode : ParseMode = 'char' ;
217222 let currentWord = '' ;
218223 let currentAtomicTag = '' ;
219224 let currentStyleTag = '' ;
@@ -223,7 +228,7 @@ export function htmlToTokens(html: string): Token[] {
223228 switch ( mode ) {
224229 case 'tag' : {
225230 const atomicTag = isStartOfAtomicTag ( currentWord ) ;
226- const styleTag = isStartOfStyleTag ( currentWord ) ;
231+ const styleTag = isStartOfStyleTag ( currentWord + char ) ;
227232 if ( atomicTag ) {
228233 mode = 'atomic_tag' ;
229234 currentAtomicTag = atomicTag ;
@@ -331,7 +336,7 @@ export function htmlToTokens(html: string): Token[] {
331336 }
332337 break ;
333338 default :
334- throw new Error ( 'Unknown mode ' + mode ) ;
339+ return exhaustive ( mode ) ;
335340 }
336341 }
337342 if ( currentWord ) {
@@ -389,7 +394,7 @@ function getKeyForToken(token: string){
389394 }
390395
391396 // Treat entire style tag as needing to be compared
392- var styleTag = / ^ < ( s t r o n g | e m ) [ \s > ] / . exec ( token ) ;
397+ const styleTag = styleTagsRegExp . exec ( token ) ;
393398 if ( styleTag ) {
394399 return token ;
395400 }
0 commit comments