@@ -21,15 +21,16 @@ var VOIDS = new Set("area base br col command embed hr img input keygen link met
2121
2222// doctype, comments, meta, style, link & script tags. TODO: CDATA
2323var NASTIES = / < ! d o c t y p e [ ^ > ] * > | < ! - - [ \s \S ] * ?- - > | < s c r i p t [ ^ > ] * > [ \s \S ] * ?< \/ s c r i p t > | < s t y l e [ ^ > ] * > [ \s \S ] * ?< \/ s t y l e > | < l i n k [ ^ > ] * > | < m e t a [ ^ > ] * > / gmi;
24- var RE_ATTRS = / ( [ \w - ] + ) (?: = " ( [ ^ " ] * ) " | = ' ( [ ^ ' ] * ) ' | = ( \S + ) ) ? / gm;
2524var RE = {
26- TAG_HEAD : / \s * < ( [ \w - ] + ) (?: \s * ( [ ^ > ] * ) ) ? > \s * / myi,
25+ NAME : / \s * < ( [ \w - ] + ) \s * / myi,
26+ ATTR : / \s * ( [ \w - ] + ) (?: = " ( [ ^ " ] * ) " | = ' ( [ ^ ' ] * ) ' | = ( \S + ) ) ? \s * / myi,
27+ TAIL : / \s * ( \/ ? > ) \s * / myi,
2728 TEXT : / \s * [ ^ < ] * / my,
28- TAG_CLOSE : / \s * < \/ [ \w - ] + > \s * / myi,
29+ CLOSE : / \s * < \/ [ \w - ] + > \s * / myi,
2930} ;
3031
3132function tokenize ( html ) {
32- var pos = 0 , m , tokens = [ ] ;
33+ var pos = 0 , m , m2 , tokens = [ ] ;
3334
3435 function syncPos ( re ) {
3536 pos = re . lastIndex ;
@@ -38,34 +39,39 @@ function tokenize(html) {
3839 }
3940
4041 function next ( ) {
41- m = RE . TAG_CLOSE . exec ( html ) ;
42+ m = RE . CLOSE . exec ( html ) ;
4243
4344 if ( m != null ) {
44- syncPos ( RE . TAG_CLOSE ) ;
45+ syncPos ( RE . CLOSE ) ;
4546 tokens . push ( TAG_CLOSE ) ;
4647 return ;
4748 }
4849
49- m = RE . TAG_HEAD . exec ( html ) ;
50+ m = RE . NAME . exec ( html ) ;
5051
5152 if ( m != null ) {
52- syncPos ( RE . TAG_HEAD ) ;
53+ syncPos ( RE . NAME ) ;
5354 var tag = m [ 1 ] ;
5455 tokens . push ( TAG_OPEN , tag ) ;
5556
56- var attrs = m [ 2 ] ;
57+ var attrMap ;
5758
58- if ( attrs != null ) {
59- var attrMap = new Map ( ) ;
60- var m2 ;
61- while ( m2 = RE_ATTRS . exec ( attrs ) )
62- { attrMap . set ( m2 [ 1 ] , ( m2 [ 2 ] || m2 [ 3 ] || m2 [ 4 ] || '' ) . trim ( ) ) ; }
63- tokens . push ( ATTRS , attrMap ) ;
59+ while ( m2 = RE . ATTR . exec ( html ) ) {
60+ syncPos ( RE . ATTR ) ;
61+ attrMap = attrMap || new Map ( ) ;
62+ attrMap . set ( m2 [ 1 ] , ( m2 [ 2 ] || m2 [ 3 ] || m2 [ 4 ] || '' ) . trim ( ) ) ;
6463 }
6564
66- if ( VOIDS . has ( tag ) || attrs && attrs . endsWith ( "/" ) )
65+ if ( attrMap )
66+ { tokens . push ( ATTRS , attrMap ) ; }
67+
68+ m2 = RE . TAIL . exec ( html ) ;
69+
70+ if ( VOIDS . has ( tag ) || m2 [ 1 ] == "/>" )
6771 { tokens . push ( TAG_CLOSE ) ; }
6872
73+ syncPos ( RE . TAIL ) ;
74+
6975 return ;
7076 }
7177
0 commit comments