Skip to content

Commit 17c166a

Browse files
committed
WIP: support for multiline html attributes and attributes named <!--
1 parent 60d8bbf commit 17c166a

File tree

2 files changed

+61
-35
lines changed

2 files changed

+61
-35
lines changed

src/librustdoc/passes/lint/html_tags.rs

Lines changed: 41 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -152,20 +152,27 @@ pub(crate) fn visit_item(cx: &DocContext<'_>, item: &Item, hir_id: HirId, dox: &
152152
}
153153
}
154154

155-
if !tagp.tag_name.is_empty() {
156-
report_diag(format!("incomplete HTML tag `{}`", &tagp.tag_name), &(tagp.tag_start_pos..dox.len()), false);
157-
}
158-
159-
for (tag, range) in tagp.tags.iter().filter(|(t, range)| {
160-
let t = t.to_lowercase();
161-
!is_implicitly_self_closing(&t, range.clone(), dox)
162-
}) {
163-
report_diag(format!("unclosed HTML tag `{tag}`"), range, true);
164-
}
165-
166155
if let Some(range) = is_in_comment {
167156
report_diag("Unclosed HTML comment".to_string(), &range, false);
157+
} else if let &Some(quote_pos) = &tagp.quote_pos {
158+
let qr = Range { start: quote_pos, end: quote_pos };
159+
report_diag(
160+
format!("unclosed quoted HTML attribute on tag `{}`", &tagp.tag_name),
161+
&qr,
162+
false,
163+
);
164+
} else {
165+
if !tagp.tag_name.is_empty() {
166+
report_diag(format!("incomplete HTML tag `{}`", &tagp.tag_name), &(tagp.tag_start_pos..dox.len()), false);
167+
}
168+
for (tag, range) in tagp.tags.iter().filter(|(t, range)| {
169+
let t = t.to_lowercase();
170+
!is_implicitly_self_closing(&t, range.clone(), dox)
171+
}) {
172+
report_diag(format!("unclosed HTML tag `{tag}`"), range, true);
173+
}
168174
}
175+
169176
}
170177

171178
/// These tags are interpreted as self-closing if they lack an explict closing tag.
@@ -251,16 +258,25 @@ struct TagParser {
251258
is_closing: bool,
252259
/// `true` if we are within a tag, but not within its name.
253260
in_attrs: bool,
261+
/// If we are in a quoted attribute, what quote char does it use?
262+
///
263+
/// This needs to be stored in the struct since HTML5 allows newlines in quoted attrs.
264+
quote: Option<char>,
265+
quote_pos: Option<usize>,
266+
after_eq: bool,
254267
}
255268

256269
impl TagParser {
257270
fn new() -> Self {
258271
Self{
259272
tags: Vec::new(),
260273
tag_name: String::with_capacity(8),
261-
is_closing: false,
262274
tag_start_pos: 0,
275+
is_closing: false,
263276
in_attrs: false,
277+
quote: None,
278+
quote_pos: None,
279+
after_eq:false,
264280
}
265281
}
266282

@@ -375,39 +391,36 @@ impl TagParser {
375391
self.drop_tag(r, dox, f);
376392
} else {
377393
let mut is_self_closing = false;
378-
let mut quote_pos = None;
379394
if c != '>' {
380-
let mut quote = None;
381-
let mut after_eq = false;
382395
'parse_til_gt: {
383396
for (i, c) in text[pos..].char_indices() {
384397
if !c.is_whitespace() {
385-
if let Some(q) = quote {
398+
if let Some(q) = self.quote {
386399
if c == q {
387-
quote = None;
388-
quote_pos = None;
389-
after_eq = false;
400+
self.quote = None;
401+
self.quote_pos = None;
402+
self.after_eq = false;
390403
}
391404
} else if c == '>' {
392405
// fall through and call `tag_parsed`.
393406
break 'parse_til_gt;
394407
} else if c == '<' {
395408
self.handle_lt_in_tag(range.clone(), pos + i, f);
396-
} else if c == '/' && !after_eq {
409+
} else if c == '/' && !self.after_eq {
397410
is_self_closing = true;
398411
} else {
399412
if is_self_closing {
400413
is_self_closing = false;
401414
}
402-
if (c == '"' || c == '\'') && after_eq {
403-
quote = Some(c);
404-
quote_pos = Some(pos + i);
415+
if (c == '"' || c == '\'') && self.after_eq {
416+
self.quote = Some(c);
417+
self.quote_pos = Some(pos + i);
405418
} else if c == '=' {
406-
after_eq = true;
419+
self.after_eq = true;
407420
}
408421
}
409-
} else if quote.is_none() {
410-
after_eq = false;
422+
} else if self.quote.is_none() {
423+
self.after_eq = false;
411424
}
412425
}
413426
// if we've run out of text but still haven't found a `>`,
@@ -417,14 +430,6 @@ impl TagParser {
417430
break 'outer_loop;
418431
}
419432
}
420-
if let Some(quote_pos) = quote_pos {
421-
let qr = Range { start: quote_pos, end: quote_pos };
422-
f(
423-
format!("unclosed quoted HTML attribute on tag `{}`", &self.tag_name),
424-
&qr,
425-
false,
426-
);
427-
}
428433
if is_self_closing {
429434
// https://html.spec.whatwg.org/#parse-error-non-void-html-element-start-tag-with-trailing-solidus
430435
let valid = ALLOWED_UNCLOSED.contains(&&self.tag_name[..])
@@ -470,7 +475,8 @@ impl TagParser {
470475
*is_in_comment = None;
471476
}
472477
} else if c == '<' {
473-
if text[start_pos..].starts_with("<!--") {
478+
// "<!--" is a valid attribute name under html5, so don't treat it as a comment if we're in a tag.
479+
if self.tag_name.is_empty() && text[start_pos..].starts_with("<!--") {
474480
// We skip the "!--" part. (Once `advance_by` is stable, might be nice to use it!)
475481
iter.next();
476482
iter.next();

tests/rustdoc-ui/lints/invalid-html-tags.rs

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,3 +177,23 @@ pub fn u() {}
177177

178178
/// <a href=">" alt="<">html5 allows this</a>
179179
pub fn no_error_5() {}
180+
181+
/// <img title="
182+
/// html5
183+
/// allows
184+
/// multiline
185+
/// attr
186+
/// values
187+
/// ">
188+
pub fn no_error_6() {}
189+
190+
/// <a href="data:text/html,<!DOCTYPE>
191+
/// <html>
192+
/// <body><b>this is allowed for some reason</b></body>
193+
/// </html>
194+
/// ">what</a>'
195+
pub fn no_error_7() {}
196+
197+
198+
/// <p <!-->all of &gt;, !, and - are valid in attribute names</p>
199+
pub fn no_error_7() {}

0 commit comments

Comments
 (0)