@@ -41,45 +41,63 @@ export class HtmlParser {
4141 parse ( html : string ) {
4242 let treatAsChars = false
4343 let index , match , characters
44+ // Precompile regex for script/style end tags
45+ let scriptEndRe : RegExp | null = null
46+ let styleEndRe : RegExp | null = null
4447 while ( html . length ) {
4548 // comment
46- if ( html . substring ( 0 , 4 ) === '<!--' ) {
49+ if ( html . startsWith ( '<!--' ) ) {
4750 index = html . indexOf ( '-->' )
4851 if ( index !== - 1 ) {
4952 this . scanner . comment ( html . substring ( 4 , index ) )
50- html = html . substring ( index + 3 )
53+ html = html . slice ( index + 3 )
5154 treatAsChars = false
5255 }
5356 else {
5457 treatAsChars = true
5558 }
5659 }
57-
5860 // end tag
59- else if ( html . substring ( 0 , 2 ) === '</' ) {
60- match = this . endTagRe . exec ( html )
61+ else if ( html . startsWith ( '</' ) ) {
62+ match = html . match ( this . endTagRe )
6163 if ( match ) {
62- html = RegExp . rightContext
64+ const matchLen = match [ 0 ] . length
65+ html = html . slice ( matchLen )
6366 treatAsChars = false
64- this . parseEndTag ( RegExp . lastMatch , match [ 1 ] )
67+ this . parseEndTag ( match [ 0 ] , match [ 1 ] )
6568 }
6669 else {
6770 treatAsChars = true
6871 }
6972 }
70-
7173 // start tag
72- else if ( html . charAt ( 0 ) === '<' ) {
73- match = this . startTagRe . exec ( html )
74+ else if ( html [ 0 ] === '<' ) {
75+ match = html . match ( this . startTagRe )
7476 if ( match ) {
75- html = RegExp . rightContext
77+ const matchLen = match [ 0 ] . length
78+ html = html . slice ( matchLen )
7679 treatAsChars = false
77- const tagName = this . parseStartTag ( RegExp . lastMatch , match [ 1 ] , match )
78- if ( tagName === 'script' || tagName === 'style' ) {
79- index = html . search ( new RegExp ( `<\/${ tagName } ` , 'i' ) )
80+ const tagName = this . parseStartTag ( match [ 0 ] , match [ 1 ] , match )
81+ if ( tagName === 'script' ) {
82+ if ( ! scriptEndRe )
83+ scriptEndRe = / < \/ s c r i p t / i
84+ index = html . search ( scriptEndRe )
8085 if ( index !== - 1 ) {
8186 this . scanner . characters ( html . substring ( 0 , index ) )
82- html = html . substring ( index )
87+ html = html . slice ( index )
88+ treatAsChars = false
89+ }
90+ else {
91+ treatAsChars = true
92+ }
93+ }
94+ else if ( tagName === 'style' ) {
95+ if ( ! styleEndRe )
96+ styleEndRe = / < \/ s t y l e / i
97+ index = html . search ( styleEndRe )
98+ if ( index !== - 1 ) {
99+ this . scanner . characters ( html . substring ( 0 , index ) )
100+ html = html . slice ( index )
83101 treatAsChars = false
84102 }
85103 else {
@@ -91,31 +109,25 @@ export class HtmlParser {
91109 treatAsChars = true
92110 }
93111 }
94-
95112 if ( treatAsChars ) {
96113 index = html . indexOf ( '<' )
97114 let offset = index
98-
99115 if ( index === 0 ) {
100- // First char is a < so find the next one
101- index = html . substring ( 1 ) . indexOf ( '<' )
102- // We're at substring(1) so add 1 to the index
103- offset = offset + 1
116+ index = html . indexOf ( '<' , 1 )
117+ offset = 1 + ( index === - 1 ? html . length : index - 1 )
104118 }
105-
106119 if ( index === - 1 ) {
107120 characters = html
108121 html = ''
109122 }
110123 else {
111124 characters = html . substring ( 0 , offset )
112- html = html . substring ( offset )
125+ html = html . slice ( offset )
113126 }
114-
115- if ( ! this . options . ignoreWhitespaceText || ! / ^ \s * $ / . test ( characters ) )
127+ // Fast whitespace check
128+ if ( ! this . options . ignoreWhitespaceText || ( characters . length && / [ ^ \s ] / . test ( characters ) ) )
116129 this . scanner . characters ( characters )
117130 }
118-
119131 treatAsChars = true
120132 match = null
121133 }
0 commit comments