@@ -350,8 +350,8 @@ impl TagParser {
350350 None => break ,
351351 } ;
352352 prev_pos = pos;
353- // Checking if this is a closing tag (like `</a>` for `<a>`).
354- if c == '/' && self . tag_name . is_empty ( ) {
353+ if ! self . in_attrs && c == '/' && self . tag_name . is_empty ( ) {
354+ // Checking if this is a closing tag (like `</a>` for `<a>`).
355355 self . is_closing = true ;
356356 } else if !self . in_attrs && is_valid_for_html_tag_name ( c, self . tag_name . is_empty ( ) ) {
357357 self . tag_name . push ( c) ;
@@ -389,69 +389,83 @@ impl TagParser {
389389 }
390390 }
391391 self . drop_tag ( r, dox, f) ;
392+ self . tag_parsed ( ) ;
392393 } else {
393- let mut is_self_closing = false ;
394- if c != '>' {
395- ' parse_til_gt: {
396- for ( i, c) in text[ pos..] . char_indices ( ) {
397- if !c. is_whitespace ( ) {
398- if let Some ( q) = self . quote {
399- if c == q {
400- self . quote = None ;
401- self . quote_pos = None ;
402- self . after_eq = false ;
403- }
404- } else if c == '>' {
405- // fall through and call `tag_parsed`.
406- break ' parse_til_gt;
407- } else if c == '<' {
408- self . handle_lt_in_tag ( range. clone ( ) , pos + i, f) ;
409- } else if c == '/' && !self . after_eq {
410- is_self_closing = true ;
411- } else {
412- if is_self_closing {
413- is_self_closing = false ;
414- }
415- if ( c == '"' || c == '\'' ) && self . after_eq {
416- self . quote = Some ( c) ;
417- self . quote_pos = Some ( pos + i) ;
418- } else if c == '=' {
419- self . after_eq = true ;
420- }
421- }
422- } else if self . quote . is_none ( ) {
423- self . after_eq = false ;
424- }
425- }
426- // if we've run out of text but still haven't found a `>`,
427- // break out of the outer loop to skip over `tag_parsed`.
428- // this allows us to either find the `>` in a later event
429- // or emit a lint about it being missing.
430- break ' outer_loop;
431- }
432- }
433- if is_self_closing {
434- // https://html.spec.whatwg.org/#parse-error-non-void-html-element-start-tag-with-trailing-solidus
435- let valid = ALLOWED_UNCLOSED . contains ( & & self . tag_name [ ..] )
436- || self . tags . iter ( ) . take ( pos + 1 ) . any ( |( at, _) | {
437- let at = at. to_lowercase ( ) ;
438- at == "svg" || at == "math"
439- } ) ;
440- if !valid {
441- f ( format ! ( "invalid self-closing HTML tag `{}`" , self . tag_name) , & r, false ) ;
442- }
443- } else if !self . tag_name . is_empty ( ) {
444- self . tags . push ( ( std:: mem:: take ( & mut self . tag_name ) , r) ) ;
445- }
394+ self . extract_opening_tag ( text, range, r, pos, c, iter, f)
446395 }
447- self . tag_parsed ( ) ;
448396 }
449397 break ;
450398 }
451399 iter. next ( ) ;
452400 }
453401 }
454402
403+ fn extract_opening_tag (
404+ & mut self ,
405+ text : & str ,
406+ range : & Range < usize > ,
407+ r : Range < usize > ,
408+ pos : usize ,
409+ c : char ,
410+ iter : & mut Peekable < CharIndices < ' _ > > ,
411+ f : & impl Fn ( String , & Range < usize > , bool ) ,
412+ ) {
413+ // we can store this as a local, since html5 does require the `/` and `>`
414+ // to not be seperated by whitespace.
415+ let mut is_self_closing = false ;
416+ if c != '>' {
417+ ' parse_til_gt: {
418+ for ( i, c) in text[ pos..] . char_indices ( ) {
419+ if !c. is_whitespace ( ) {
420+ if let Some ( q) = self . quote {
421+ if c == q {
422+ self . quote = None ;
423+ self . quote_pos = None ;
424+ self . after_eq = false ;
425+ }
426+ } else if c == '>' {
427+ break ' parse_til_gt;
428+ } else if c == '<' {
429+ self . handle_lt_in_tag ( range. clone ( ) , pos + i, f) ;
430+ } else if c == '/' && !self . after_eq {
431+ is_self_closing = true ;
432+ } else {
433+ if is_self_closing {
434+ is_self_closing = false ;
435+ }
436+ if ( c == '"' || c == '\'' ) && self . after_eq {
437+ self . quote = Some ( c) ;
438+ self . quote_pos = Some ( pos + i) ;
439+ } else if c == '=' {
440+ self . after_eq = true ;
441+ }
442+ }
443+ } else if self . quote . is_none ( ) {
444+ self . after_eq = false ;
445+ }
446+ }
447+ // if we've run out of text but still haven't found a `>`,
448+ // return early without calling `tag_parsed` or emitting lints.
449+ // this allows us to either find the `>` in a later event
450+ // or emit a lint about it being missing.
451+ return ;
452+ }
453+ }
454+ if is_self_closing {
455+ // https://html.spec.whatwg.org/#parse-error-non-void-html-element-start-tag-with-trailing-solidus
456+ let valid = ALLOWED_UNCLOSED . contains ( & & self . tag_name [ ..] )
457+ || self . tags . iter ( ) . take ( pos + 1 ) . any ( |( at, _) | {
458+ let at = at. to_lowercase ( ) ;
459+ at == "svg" || at == "math"
460+ } ) ;
461+ if !valid {
462+ f ( format ! ( "invalid self-closing HTML tag `{}`" , self . tag_name) , & r, false ) ;
463+ }
464+ } else if !self . tag_name . is_empty ( ) {
465+ self . tags . push ( ( std:: mem:: take ( & mut self . tag_name ) , r) ) ;
466+ }
467+ self . tag_parsed ( ) ;
468+ }
455469 /// Finished parsing a tag, reset related data.
456470 fn tag_parsed ( & mut self ) {
457471 self . tag_name . clear ( ) ;
@@ -468,8 +482,23 @@ impl TagParser {
468482 f : & impl Fn ( String , & Range < usize > , bool ) ,
469483 ) {
470484 let mut iter = text. char_indices ( ) . peekable ( ) ;
471-
472- while let Some ( ( start_pos, c) ) = iter. next ( ) {
485+ let mut prev_pos = 0 ;
486+ loop {
487+ if self . quote . is_some ( ) {
488+ assert ! ( self . in_attrs) ;
489+ }
490+ if self . in_attrs &&
491+ let Some ( & ( start_pos, _) ) = iter. peek ( )
492+ {
493+ self . extract_html_tag ( text, & range, dox, start_pos, & mut iter, f) ;
494+ // if no progress is being made, move forward forcefully.
495+ if prev_pos == start_pos {
496+ iter. next ( ) ;
497+ }
498+ prev_pos = start_pos;
499+ continue ;
500+ }
501+ let Some ( ( start_pos, c) ) = iter. next ( ) else { break } ;
473502 if is_in_comment. is_some ( ) {
474503 if text[ start_pos..] . starts_with ( "-->" ) {
475504 * is_in_comment = None ;
@@ -504,20 +533,46 @@ impl TagParser {
504533 }
505534 }
506535
536+
537+
507538}
508539
509540#[ test]
510541fn test_extract_tags_nested_unclosed ( ) {
511- use std:: rc:: Rc ;
512542 use std:: cell:: RefCell ;
513- use std:: ops:: Deref ;
514543
515544 let mut tagp = TagParser :: new ( ) ;
516545 let mut diags = RefCell :: new ( Vec :: new ( ) ) ;
517- let dox = "<div>\n <br<div>" ;
546+ let dox = "<div>\n <br</ div>" ;
518547 tagp. extract_tags ( dox, 0 ..dox. len ( ) , dox, & mut None , & |s, r, b| {
519548 diags. borrow_mut ( ) . push ( ( s, r. clone ( ) , b) ) ;
520549 } ) ;
521550 assert_eq ! ( diags. borrow( ) . len( ) , 1 , "did not get expected diagnostics: {diags:?}" ) ;
522551 assert_eq ! ( diags. borrow( ) [ 0 ] . 1 , 6 ..9 )
523552}
553+
554+ #[ test]
555+ fn test_extract_tags_taglike_in_attr ( ) {
556+ use std:: cell:: RefCell ;
557+
558+ let mut tagp = TagParser :: new ( ) ;
559+ let mut diags = RefCell :: new ( Vec :: new ( ) ) ;
560+ let dox = "<img src='<div>'>" ;
561+ tagp. extract_tags ( dox, 0 ..dox. len ( ) , dox, & mut None , & |s, r, b| {
562+ diags. borrow_mut ( ) . push ( ( s, r. clone ( ) , b) ) ;
563+ } ) ;
564+ assert_eq ! ( diags. borrow( ) . len( ) , 0 , "unexpected diagnostics: {diags:?}" ) ;
565+ }
566+
567+ #[ test]
568+ fn test_extract_tags_taglike_in_multiline_attr ( ) {
569+ use std:: cell:: RefCell ;
570+
571+ let mut tagp = TagParser :: new ( ) ;
572+ let mut diags = RefCell :: new ( Vec :: new ( ) ) ;
573+ let dox = "<img src=\" \n asd\n <div>\n \" >" ;
574+ tagp. extract_tags ( dox, 0 ..dox. len ( ) , dox, & mut None , & |s, r, b| {
575+ diags. borrow_mut ( ) . push ( ( s, r. clone ( ) , b) ) ;
576+ } ) ;
577+ assert_eq ! ( diags. borrow( ) . len( ) , 0 , "unexpected diagnostics: {diags:?}" ) ;
578+ }
0 commit comments