Skip to content

Commit 3272337

Browse files
committed
WIP: failsafe to prevent infinite loop, more tests
1 parent 17c166a commit 3272337

File tree

4 files changed

+160
-64
lines changed

4 files changed

+160
-64
lines changed

src/librustdoc/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#![feature(file_buffered)]
1212
#![feature(format_args_nl)]
1313
#![feature(if_let_guard)]
14+
#![feature(iter_advance_by)]
1415
#![feature(iter_intersperse)]
1516
#![feature(round_char_boundary)]
1617
#![feature(rustc_private)]

src/librustdoc/passes/lint/html_tags.rs

Lines changed: 116 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -350,8 +350,8 @@ impl TagParser {
350350
None => break,
351351
};
352352
prev_pos = pos;
353-
// Checking if this is a closing tag (like `</a>` for `<a>`).
354-
if c == '/' && self.tag_name.is_empty() {
353+
if !self.in_attrs && c == '/' && self.tag_name.is_empty() {
354+
// Checking if this is a closing tag (like `</a>` for `<a>`).
355355
self.is_closing = true;
356356
} else if !self.in_attrs && is_valid_for_html_tag_name(c, self.tag_name.is_empty()) {
357357
self.tag_name.push(c);
@@ -389,69 +389,83 @@ impl TagParser {
389389
}
390390
}
391391
self.drop_tag(r, dox, f);
392+
self.tag_parsed();
392393
} else {
393-
let mut is_self_closing = false;
394-
if c != '>' {
395-
'parse_til_gt: {
396-
for (i, c) in text[pos..].char_indices() {
397-
if !c.is_whitespace() {
398-
if let Some(q) = self.quote {
399-
if c == q {
400-
self.quote = None;
401-
self.quote_pos = None;
402-
self.after_eq = false;
403-
}
404-
} else if c == '>' {
405-
// fall through and call `tag_parsed`.
406-
break 'parse_til_gt;
407-
} else if c == '<' {
408-
self.handle_lt_in_tag(range.clone(), pos + i, f);
409-
} else if c == '/' && !self.after_eq {
410-
is_self_closing = true;
411-
} else {
412-
if is_self_closing {
413-
is_self_closing = false;
414-
}
415-
if (c == '"' || c == '\'') && self.after_eq {
416-
self.quote = Some(c);
417-
self.quote_pos = Some(pos + i);
418-
} else if c == '=' {
419-
self.after_eq = true;
420-
}
421-
}
422-
} else if self.quote.is_none() {
423-
self.after_eq = false;
424-
}
425-
}
426-
// if we've run out of text but still haven't found a `>`,
427-
// break out of the outer loop to skip over `tag_parsed`.
428-
// this allows us to either find the `>` in a later event
429-
// or emit a lint about it being missing.
430-
break 'outer_loop;
431-
}
432-
}
433-
if is_self_closing {
434-
// https://html.spec.whatwg.org/#parse-error-non-void-html-element-start-tag-with-trailing-solidus
435-
let valid = ALLOWED_UNCLOSED.contains(&&self.tag_name[..])
436-
|| self.tags.iter().take(pos + 1).any(|(at, _)| {
437-
let at = at.to_lowercase();
438-
at == "svg" || at == "math"
439-
});
440-
if !valid {
441-
f(format!("invalid self-closing HTML tag `{}`", self.tag_name), &r, false);
442-
}
443-
} else if !self.tag_name.is_empty() {
444-
self.tags.push((std::mem::take(&mut self.tag_name), r));
445-
}
394+
self.extract_opening_tag(text, range, r, pos, c, iter, f)
446395
}
447-
self.tag_parsed();
448396
}
449397
break;
450398
}
451399
iter.next();
452400
}
453401
}
454402

403+
fn extract_opening_tag(
404+
&mut self,
405+
text: &str,
406+
range: &Range<usize>,
407+
r: Range<usize>,
408+
pos: usize,
409+
c: char,
410+
iter: &mut Peekable<CharIndices<'_>>,
411+
f: &impl Fn(String, &Range<usize>, bool),
412+
) {
413+
// we can store this as a local, since html5 does require the `/` and `>`
414+
// to not be seperated by whitespace.
415+
let mut is_self_closing = false;
416+
if c != '>' {
417+
'parse_til_gt: {
418+
for (i, c) in text[pos..].char_indices() {
419+
if !c.is_whitespace() {
420+
if let Some(q) = self.quote {
421+
if c == q {
422+
self.quote = None;
423+
self.quote_pos = None;
424+
self.after_eq = false;
425+
}
426+
} else if c == '>' {
427+
break 'parse_til_gt;
428+
} else if c == '<' {
429+
self.handle_lt_in_tag(range.clone(), pos + i, f);
430+
} else if c == '/' && !self.after_eq {
431+
is_self_closing = true;
432+
} else {
433+
if is_self_closing {
434+
is_self_closing = false;
435+
}
436+
if (c == '"' || c == '\'') && self.after_eq {
437+
self.quote = Some(c);
438+
self.quote_pos = Some(pos + i);
439+
} else if c == '=' {
440+
self.after_eq = true;
441+
}
442+
}
443+
} else if self.quote.is_none() {
444+
self.after_eq = false;
445+
}
446+
}
447+
// if we've run out of text but still haven't found a `>`,
448+
// return early without calling `tag_parsed` or emitting lints.
449+
// this allows us to either find the `>` in a later event
450+
// or emit a lint about it being missing.
451+
return;
452+
}
453+
}
454+
if is_self_closing {
455+
// https://html.spec.whatwg.org/#parse-error-non-void-html-element-start-tag-with-trailing-solidus
456+
let valid = ALLOWED_UNCLOSED.contains(&&self.tag_name[..])
457+
|| self.tags.iter().take(pos + 1).any(|(at, _)| {
458+
let at = at.to_lowercase();
459+
at == "svg" || at == "math"
460+
});
461+
if !valid {
462+
f(format!("invalid self-closing HTML tag `{}`", self.tag_name), &r, false);
463+
}
464+
} else if !self.tag_name.is_empty() {
465+
self.tags.push((std::mem::take(&mut self.tag_name), r));
466+
}
467+
self.tag_parsed();
468+
}
455469
/// Finished parsing a tag, reset related data.
456470
fn tag_parsed(&mut self) {
457471
self.tag_name.clear();
@@ -468,8 +482,23 @@ impl TagParser {
468482
f: &impl Fn(String, &Range<usize>, bool),
469483
) {
470484
let mut iter = text.char_indices().peekable();
471-
472-
while let Some((start_pos, c)) = iter.next() {
485+
let mut prev_pos = 0;
486+
loop {
487+
if self.quote.is_some() {
488+
assert!(self.in_attrs);
489+
}
490+
if self.in_attrs &&
491+
let Some(&(start_pos, _)) = iter.peek()
492+
{
493+
self.extract_html_tag(text, &range, dox, start_pos, &mut iter, f);
494+
// if no progress is being made, move forward forcefully.
495+
if prev_pos == start_pos {
496+
iter.next();
497+
}
498+
prev_pos = start_pos;
499+
continue;
500+
}
501+
let Some((start_pos, c)) = iter.next() else { break };
473502
if is_in_comment.is_some() {
474503
if text[start_pos..].starts_with("-->") {
475504
*is_in_comment = None;
@@ -504,20 +533,46 @@ impl TagParser {
504533
}
505534
}
506535

536+
537+
507538
}
508539

509540
#[test]
510541
fn test_extract_tags_nested_unclosed() {
511-
use std::rc::Rc;
512542
use std::cell::RefCell;
513-
use std::ops::Deref;
514543

515544
let mut tagp = TagParser::new();
516545
let mut diags = RefCell::new(Vec::new());
517-
let dox = "<div>\n<br<div>";
546+
let dox = "<div>\n<br</div>";
518547
tagp.extract_tags(dox, 0..dox.len(), dox, &mut None, &|s, r, b| {
519548
diags.borrow_mut().push((s, r.clone(), b));
520549
});
521550
assert_eq!(diags.borrow().len(), 1, "did not get expected diagnostics: {diags:?}");
522551
assert_eq!(diags.borrow()[0].1, 6..9)
523552
}
553+
554+
#[test]
555+
fn test_extract_tags_taglike_in_attr() {
556+
use std::cell::RefCell;
557+
558+
let mut tagp = TagParser::new();
559+
let mut diags = RefCell::new(Vec::new());
560+
let dox = "<img src='<div>'>";
561+
tagp.extract_tags(dox, 0..dox.len(), dox, &mut None, &|s, r, b| {
562+
diags.borrow_mut().push((s, r.clone(), b));
563+
});
564+
assert_eq!(diags.borrow().len(), 0, "unexpected diagnostics: {diags:?}");
565+
}
566+
567+
#[test]
568+
fn test_extract_tags_taglike_in_multiline_attr() {
569+
use std::cell::RefCell;
570+
571+
let mut tagp = TagParser::new();
572+
let mut diags = RefCell::new(Vec::new());
573+
let dox = "<img src=\"\nasd\n<div>\n\">";
574+
tagp.extract_tags(dox, 0..dox.len(), dox, &mut None, &|s, r, b| {
575+
diags.borrow_mut().push((s, r.clone(), b));
576+
});
577+
assert_eq!(diags.borrow().len(), 0, "unexpected diagnostics: {diags:?}");
578+
}

tests/rustdoc-ui/lints/invalid-html-tags.rs

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -184,16 +184,20 @@ pub fn no_error_5() {}
184184
/// multiline
185185
/// attr
186186
/// values
187+
/// these are just text, not tags:
188+
/// </div>
189+
/// <p/>
190+
/// <div>
187191
/// ">
188192
pub fn no_error_6() {}
189193

190194
/// <a href="data:text/html,<!DOCTYPE>
191195
/// <html>
192196
/// <body><b>this is allowed for some reason</b></body>
193197
/// </html>
194-
/// ">what</a>'
198+
/// ">what</a>
195199
pub fn no_error_7() {}
196200

197201

198202
/// <p <!-->all of &gt;, !, and - are valid in attribute names</p>
199-
pub fn no_error_7() {}
203+
pub fn no_error_8() {}

tests/rustdoc-ui/lints/invalid-html-tags.stderr

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,5 +139,41 @@ error: incomplete HTML tag `br`
139139
LL | /// <br
140140
| ^^^
141141

142-
error: aborting due to 22 previous errors
142+
error: unopened HTML tag `div`
143+
--> $DIR/invalid-html-tags.rs:188:5
144+
|
145+
LL | /// </div>
146+
| ^^^^^^
147+
148+
error: invalid self-closing HTML tag `p`
149+
--> $DIR/invalid-html-tags.rs:189:5
150+
|
151+
LL | /// <p/>
152+
| ^^
153+
154+
error: unclosed HTML tag `div`
155+
--> $DIR/invalid-html-tags.rs:190:5
156+
|
157+
LL | /// <div>
158+
| ^^^^^
159+
160+
error: unopened HTML tag `a`
161+
--> $DIR/invalid-html-tags.rs:198:11
162+
|
163+
LL | /// ">what</a>
164+
| ^^^^
165+
166+
error: incomplete HTML tag `p`
167+
--> $DIR/invalid-html-tags.rs:202:5
168+
|
169+
LL | /// <p <!-->all of &gt;, !, and - are valid in attribute names</p>
170+
| ^^^
171+
172+
error: Unclosed HTML comment
173+
--> $DIR/invalid-html-tags.rs:202:8
174+
|
175+
LL | /// <p <!-->all of &gt;, !, and - are valid in attribute names</p>
176+
| ^^^^
177+
178+
error: aborting due to 28 previous errors
143179

0 commit comments

Comments
 (0)