Skip to content

Commit ed3271d

Browse files
committed
Improve broken link to catch more cases and span point to whole link
1 parent bc3ff86 commit ed3271d

File tree

3 files changed

+157
-43
lines changed

3 files changed

+157
-43
lines changed

clippy_lints/src/doc/broken_link.rs

Lines changed: 98 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -11,51 +11,108 @@ pub fn check(cx: &LateContext<'_>, attrs: &[Attribute]) {
1111
}
1212
}
1313

14+
/// Scan AST attributes looking up in doc comments for broken links
15+
/// which rustdoc won't be able to properly create link tags later.
16+
#[derive(Debug)]
1417
struct BrokenLinkLoader {
18+
/// List of spans for detected broken links.
1519
spans_broken_link: Vec<Span>,
20+
21+
/// Mark it has detected a link and it is processing whether
22+
/// or not it is broken.
1623
active: bool,
24+
25+
/// Keep track of the span for the processing broken link.
26+
active_span: Option<Span>,
27+
28+
/// Keep track where exactly the link definition has started in the code.
29+
active_pos_start: u32,
30+
31+
/// Mark it is processing the link text tag.
1732
processing_link_text: bool,
33+
34+
/// Mark it is processing the link url tag.
1835
processing_link_url: bool,
36+
37+
/// Mark it is reading the url tag content. It will be false if the loader
38+
/// got to the url tag processing, but all the chars read so far were just
39+
/// whitespaces.
40+
reading_link_url: bool,
41+
42+
/// Mark the url url isn't empty, but it still being processed in a new line.
43+
reading_link_url_new_line: bool,
44+
45+
/// Mark the current link url is broken across multiple lines.
46+
url_multiple_lines: bool,
47+
48+
/// Mark the link's span start position.
1949
start: u32,
2050
}
2151

2252
impl BrokenLinkLoader {
53+
/// Return spans of broken links.
2354
fn collect_spans_broken_link(attrs: &[Attribute]) -> Vec<Span> {
2455
let mut loader = BrokenLinkLoader {
2556
spans_broken_link: vec![],
2657
active: false,
58+
active_pos_start: 0,
59+
active_span: None,
2760
processing_link_text: false,
2861
processing_link_url: false,
62+
reading_link_url: false,
63+
reading_link_url_new_line: false,
64+
url_multiple_lines: false,
2965
start: 0_u32,
3066
};
3167
loader.scan_attrs(attrs);
3268
loader.spans_broken_link
3369
}
3470

3571
fn scan_attrs(&mut self, attrs: &[Attribute]) {
36-
for attr in attrs {
72+
for idx in 0..attrs.len() {
73+
let attr = &attrs[idx];
3774
if let AttrKind::DocComment(_com_kind, sym) = attr.kind
3875
&& let AttrStyle::Outer = attr.style
3976
{
4077
self.scan_line(sym.as_str(), attr.span);
78+
} else {
79+
if idx > 0 && self.active && self.processing_link_url {
80+
let prev_attr = &attrs[idx - 1];
81+
let prev_end_line = prev_attr.span.hi().0;
82+
self.record_broken_link(prev_end_line);
83+
}
84+
self.reset_lookup();
4185
}
4286
}
87+
88+
if self.active && self.processing_link_url {
89+
let last_end_line = attrs.last().unwrap().span.hi().0;
90+
self.record_broken_link(last_end_line);
91+
self.reset_lookup();
92+
}
4393
}
4494

45-
fn scan_line(&mut self, the_str: &str, attr_span: Span) {
95+
fn scan_line(&mut self, line: &str, attr_span: Span) {
4696
// Note that we specifically need the char _byte_ indices here, not the positional indexes
4797
// within the char array to deal with multi-byte characters properly. `char_indices` does
4898
// exactly that. It provides an iterator over tuples of the form `(byte position, char)`.
49-
let char_indices: Vec<_> = the_str.char_indices().collect();
99+
let char_indices: Vec<_> = line.char_indices().collect();
50100

51-
let mut no_url_curr_line = true;
101+
self.reading_link_url_new_line = self.reading_link_url;
52102

53103
for (pos, c) in char_indices {
104+
if pos == 0 && c.is_whitespace() {
105+
// ignore prefix whitespace on comments
106+
continue;
107+
}
108+
54109
if !self.active {
55110
if c == '[' {
56111
self.processing_link_text = true;
57112
self.active = true;
58-
self.start = attr_span.lo().0 + u32::try_from(pos).unwrap();
113+
// +3 skips the opening delimiter
114+
self.active_pos_start = attr_span.lo().0 + u32::try_from(pos).unwrap() + 3;
115+
self.active_span = Some(attr_span);
59116
}
60117
continue;
61118
}
@@ -73,37 +130,57 @@ impl BrokenLinkLoader {
73130
} else {
74131
// not a real link, start lookup over again
75132
self.reset_lookup();
76-
no_url_curr_line = true;
77133
}
78134
continue;
79135
}
80136

81137
if c == ')' {
138+
// record full broken link tag
139+
if self.url_multiple_lines {
140+
// +3 skips the opening delimiter and +1 to include the closing parethesis
141+
let pos_end = attr_span.lo().0 + u32::try_from(pos).unwrap() + 4;
142+
self.record_broken_link(pos_end);
143+
self.reset_lookup();
144+
}
82145
self.reset_lookup();
83-
no_url_curr_line = true;
84-
} else if no_url_curr_line && c != ' ' {
85-
no_url_curr_line = false;
146+
continue;
86147
}
87-
}
88-
89-
// If it got at the end of the line and it still processing a link part,
90-
// it means this is a broken link.
91-
if self.active && self.processing_link_url && !no_url_curr_line {
92-
let pos_end_line = u32::try_from(the_str.len()).unwrap() - 1;
93148

94-
// +3 skips the opening delimiter
95-
let start = BytePos(self.start + 3);
96-
let end = start + BytePos(pos_end_line);
149+
if self.reading_link_url && c.is_whitespace() {
150+
let pos_end = u32::try_from(pos).unwrap();
151+
self.record_broken_link(pos_end);
152+
self.reset_lookup();
153+
continue;
154+
}
97155

98-
let com_span = Span::new(start, end, attr_span.ctxt(), attr_span.parent());
156+
if !c.is_whitespace() {
157+
self.reading_link_url = true;
158+
}
99159

100-
self.spans_broken_link.push(com_span);
160+
if self.reading_link_url_new_line {
161+
self.url_multiple_lines = true;
162+
}
101163
}
102164
}
103165

104166
fn reset_lookup(&mut self) {
105-
self.processing_link_url = false;
106167
self.active = false;
107168
self.start = 0;
169+
self.processing_link_text = false;
170+
self.processing_link_url = false;
171+
self.reading_link_url = false;
172+
self.reading_link_url_new_line = false;
173+
self.url_multiple_lines = false;
174+
}
175+
176+
fn record_broken_link(&mut self, pos_end: u32) {
177+
if let Some(attr_span) = self.active_span {
178+
let start = BytePos(self.active_pos_start);
179+
let end = BytePos(pos_end);
180+
181+
let com_span = Span::new(start, end, attr_span.ctxt(), attr_span.parent());
182+
183+
self.spans_broken_link.push(com_span);
184+
}
108185
}
109186
}

tests/ui/doc_broken_link.rs

Lines changed: 40 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,41 @@
11
#![warn(clippy::doc_broken_link)]
22

3-
fn main() {
4-
doc_valid_link();
5-
doc_valid_link_broken_title();
6-
doc_valid_link_broken_url_tag();
7-
doc_invalid_link_broken_url_scheme_part();
8-
doc_invalid_link_broken_url_host_part();
9-
}
3+
fn main() {}
4+
5+
pub struct FakeType {}
106

117
/// Test valid link, whole link single line.
128
/// [doc valid link](https://test.fake/doc_valid_link)
139
pub fn doc_valid_link() {}
1410

15-
/// Test valid link, title tag broken across multiple lines.
11+
/// Test valid link, whole link single line but it has special chars such as brackets and
12+
/// parenthesis. [doc invalid link url invalid char](https://test.fake/doc_valid_link_url_invalid_char?foo[bar]=1&bar(foo)=2)
13+
pub fn doc_valid_link_url_invalid_char() {}
14+
15+
/// Test valid link, text tag broken across multiple lines.
1616
/// [doc invalid link broken
17-
/// title](https://test.fake/doc_valid_link_broken_title)
18-
pub fn doc_valid_link_broken_title() {}
17+
/// text](https://test.fake/doc_valid_link_broken_text)
18+
pub fn doc_valid_link_broken_text() {}
19+
20+
/// Test valid link, url tag broken across multiple lines, but
21+
/// the whole url part in a single line.
22+
/// [doc valid link broken url tag two lines first](https://test.fake/doc_valid_link_broken_url_tag_two_lines_first
23+
/// )
24+
pub fn doc_valid_link_broken_url_tag_two_lines_first() {}
1925

2026
/// Test valid link, url tag broken across multiple lines, but
2127
/// the whole url part in a single line.
22-
/// [doc valid link broken url tag](
23-
/// https://test.fake/doc_valid_link_broken_url_tag)
24-
pub fn doc_valid_link_broken_url_tag() {}
28+
/// [doc valid link broken url tag two lines second](
29+
/// https://test.fake/doc_valid_link_broken_url_tag_two_lines_second)
30+
pub fn doc_valid_link_broken_url_tag_two_lines_second() {}
31+
32+
/// Test valid link, url tag broken across multiple lines, but
33+
/// the whole url part in a single line, but the closing pharentesis
34+
/// in a third line.
35+
/// [doc valid link broken url tag three lines](
36+
/// https://test.fake/doc_valid_link_broken_url_tag_three_lines
37+
/// )
38+
pub fn doc_valid_link_broken_url_tag_three_lines() {}
2539

2640
/// Test invalid link, url part broken across multiple lines.
2741
/// [doc invalid link broken url scheme part part](https://
@@ -35,14 +49,25 @@ pub fn doc_invalid_link_broken_url_scheme_part() {}
3549
//~^^ ERROR: possible broken doc link
3650
pub fn doc_invalid_link_broken_url_host_part() {}
3751

52+
/// Test invalid link, url part broken across multiple lines.
53+
/// [doc invalid link broken url host part](
54+
/// https://test.fake/doc_invalid_link_missing_close_parenthesis
55+
//~^^ ERROR: possible broken doc link
56+
pub fn doc_invalid_link_missing_close_parenthesis() {}
57+
58+
// NOTE: We don't test doc links where the url is broken accross
59+
// multiple lines in the path part because that is something
60+
// rustdoc itself will check and warn about it.
61+
pub fn doc_invalid_link_broken_url_path_part() {}
62+
3863
/// This might be considered a link false positive
3964
/// and should be ignored by this lint rule:
40-
/// Example of referencing some code with brackets [T].
65+
/// Example of referencing some code with brackets [FakeType].
4166
pub fn doc_ignore_link_false_positive_1() {}
4267

4368
/// This might be considered a link false positive
4469
/// and should be ignored by this lint rule:
45-
/// [`T`]. Continue text after brackets,
70+
/// [`FakeType`]. Continue text after brackets,
4671
/// then (something in
4772
/// parenthesis).
4873
pub fn doc_ignore_link_false_positive_2() {}

tests/ui/doc_broken_link.stderr

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,29 @@
11
error: possible broken doc link
2-
--> tests/ui/doc_broken_link.rs:27:5
2+
--> tests/ui/doc_broken_link.rs:41:5
33
|
4-
LL | /// [doc invalid link broken url scheme part part](https://
5-
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
4+
LL | /// [doc invalid link broken url scheme part part](https://
5+
| _____^
6+
LL | | /// test.fake/doc_invalid_link_broken_url_scheme_part)
7+
| |______________________________________________________^
68
|
79
= note: `-D clippy::doc-broken-link` implied by `-D warnings`
810
= help: to override `-D warnings` add `#[allow(clippy::doc_broken_link)]`
911

1012
error: possible broken doc link
11-
--> tests/ui/doc_broken_link.rs:33:5
13+
--> tests/ui/doc_broken_link.rs:47:5
1214
|
13-
LL | /// [doc invalid link broken url host part](https://test
14-
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
15+
LL | /// [doc invalid link broken url host part](https://test
16+
| _____^
17+
LL | | /// .fake/doc_invalid_link_broken_url_host_part)
18+
| |________________________________________________^
1519

16-
error: aborting due to 2 previous errors
20+
error: possible broken doc link
21+
--> tests/ui/doc_broken_link.rs:53:5
22+
|
23+
LL | /// [doc invalid link broken url host part](
24+
| _____^
25+
LL | | /// https://test.fake/doc_invalid_link_missing_close_parenthesis
26+
| |________________________________________________________________^
27+
28+
error: aborting due to 3 previous errors
1729

0 commit comments

Comments
 (0)