Skip to content

Commit 083aadd

Browse files
committed
Handle broken links as part of the fake_broken_link_callback handler
1 parent f29cc5d commit 083aadd

File tree

4 files changed

+138
-129
lines changed

4 files changed

+138
-129
lines changed
Lines changed: 50 additions & 123 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,16 @@
11
use clippy_utils::diagnostics::span_lint;
22
use pulldown_cmark::BrokenLink as PullDownBrokenLink;
3-
use rustc_ast::{AttrKind, AttrStyle, Attribute};
43
use rustc_lint::LateContext;
5-
use rustc_resolve::rustdoc::DocFragment;
6-
use rustc_span::{BytePos, Span};
4+
use rustc_resolve::rustdoc::{DocFragment, source_span_for_markdown_range};
5+
use rustc_span::{BytePos, Pos, Span};
76

87
use super::DOC_BROKEN_LINK;
98

10-
pub fn check(cx: &LateContext<'_>, attrs: &[Attribute]) {
11-
BrokenLinkReporter::warn_if_broken_links(cx, attrs);
12-
}
13-
14-
// NOTE: temporary change to check if we can handle broken links from pulldown_cmark parser.
15-
pub fn check_v2(_cx: &LateContext<'_>, bl: &PullDownBrokenLink<'_>, doc: &String, fragments: &Vec<DocFragment>) {
16-
log(format!("\n ---------------------",).as_str());
17-
log(format!("\n doc={doc:#?}",).as_str());
18-
log(format!("\n fragments={fragments:#?}",).as_str());
19-
20-
log(format!("\n bl={bl:#?}",).as_str());
21-
22-
let text: String = doc[bl.span.clone()].chars().collect();
23-
log(format!("\n text based on 'bl.span' range={text:#?}",).as_str());
24-
log(format!("\n ---------------------",).as_str());
9+
/// Scan and report broken link on documents.
10+
/// It ignores false positives detected by pulldown_cmark, and only
11+
/// warns users when the broken link is consider a URL.
12+
pub fn check(cx: &LateContext<'_>, bl: &PullDownBrokenLink<'_>, doc: &String, fragments: &Vec<DocFragment>) {
13+
warn_if_broken_link(cx, bl, doc, fragments);
2514
}
2615

2716
/// The reason why a link is considered broken.
@@ -34,152 +23,90 @@ enum BrokenLinkReason {
3423
MultipleLines,
3524
}
3625

37-
/// Broken link data.
38-
struct BrokenLink {
39-
reason: BrokenLinkReason,
40-
span: Span,
41-
}
42-
26+
#[derive(Debug)]
4327
enum State {
4428
ProcessingLinkText,
4529
ProcessedLinkText,
4630
ProcessingLinkUrl(UrlState),
4731
}
4832

33+
#[derive(Debug)]
4934
enum UrlState {
5035
Empty,
5136
FilledEntireSingleLine,
5237
FilledBrokenMultipleLines,
5338
}
5439

55-
/// Scan AST attributes looking up in doc comments for broken links
56-
/// which rustdoc won't be able to properly create link tags later,
57-
/// and warn about those failures.
58-
struct BrokenLinkReporter {
59-
state: Option<State>,
60-
61-
/// Keep track of the span for the processing broken link.
62-
active_span: Option<Span>,
63-
64-
/// Keep track where exactly the link definition has started in the code.
65-
active_pos_start: u32,
66-
}
67-
68-
impl BrokenLinkReporter {
69-
fn warn_if_broken_links(cx: &LateContext<'_>, attrs: &[Attribute]) {
70-
let mut reporter = BrokenLinkReporter {
71-
state: None,
72-
active_pos_start: 0,
73-
active_span: None,
74-
};
75-
76-
for attr in attrs {
77-
if let AttrKind::DocComment(_com_kind, sym) = attr.kind
78-
&& let AttrStyle::Outer = attr.style
79-
{
80-
reporter.scan_line(cx, sym.as_str(), attr.span);
81-
}
82-
}
83-
}
84-
85-
fn scan_line(&mut self, cx: &LateContext<'_>, line: &str, attr_span: Span) {
86-
let reading_link_url_new_line = matches!(
87-
self.state,
88-
Some(State::ProcessingLinkUrl(UrlState::FilledEntireSingleLine))
89-
);
40+
fn warn_if_broken_link(cx: &LateContext<'_>, bl: &PullDownBrokenLink<'_>, doc: &String, fragments: &Vec<DocFragment>) {
41+
if let Some(span) = source_span_for_markdown_range(cx.tcx, doc, &bl.span, fragments) {
42+
// `PullDownBrokenLink` provided by pulldown_cmark always
43+
// start with `[` which makes pulldown_cmark consider this a link tag.
44+
let mut state = State::ProcessingLinkText;
9045

91-
for (pos, c) in line.char_indices() {
92-
if pos == 0 && c.is_whitespace() {
93-
// ignore prefix whitespace on comments
94-
continue;
95-
}
46+
// Whether it was detected a line break within the link tag url part.
47+
let mut reading_link_url_new_line = false;
9648

97-
match &self.state {
98-
None => {
99-
if c == '[' {
100-
self.state = Some(State::ProcessingLinkText);
101-
// +3 skips the opening delimiter
102-
self.active_pos_start = attr_span.lo().0 + u32::try_from(pos).unwrap() + 3;
103-
self.active_span = Some(attr_span);
104-
}
105-
},
106-
Some(State::ProcessingLinkText) => {
49+
// Skip the first char because we already know it is a `[` char.
50+
for (abs_pos, c) in doc.char_indices().skip(bl.span.start + 1) {
51+
match &state {
52+
State::ProcessingLinkText => {
10753
if c == ']' {
108-
self.state = Some(State::ProcessedLinkText);
54+
state = State::ProcessedLinkText;
10955
}
11056
},
111-
Some(State::ProcessedLinkText) => {
57+
State::ProcessedLinkText => {
11258
if c == '(' {
113-
self.state = Some(State::ProcessingLinkUrl(UrlState::Empty));
59+
state = State::ProcessingLinkUrl(UrlState::Empty);
11460
} else {
115-
// not a real link, start lookup over again
116-
self.reset_lookup();
61+
// not a real link, just skip it without reporting a broken link for it.
62+
break;
11763
}
11864
},
119-
Some(State::ProcessingLinkUrl(url_state)) => {
65+
State::ProcessingLinkUrl(url_state) => {
66+
if c == '\n' {
67+
reading_link_url_new_line = true;
68+
continue;
69+
}
70+
12071
if c == ')' {
12172
// record full broken link tag
12273
if let UrlState::FilledBrokenMultipleLines = url_state {
123-
// +3 skips the opening delimiter and +1 to include the closing parethesis
124-
let pos_end = attr_span.lo().0 + u32::try_from(pos).unwrap() + 4;
125-
self.record_broken_link(cx, pos_end, BrokenLinkReason::MultipleLines);
126-
self.reset_lookup();
74+
let offset = abs_pos - bl.span.start;
75+
report_broken_link(cx, span, offset, BrokenLinkReason::MultipleLines);
12776
}
128-
self.reset_lookup();
129-
continue;
77+
break;
13078
}
13179

13280
if !c.is_whitespace() {
13381
if reading_link_url_new_line {
13482
// It was reading a link url which was entirely in a single line, but a new char
13583
// was found in this new line which turned the url into a broken state.
136-
self.state = Some(State::ProcessingLinkUrl(UrlState::FilledBrokenMultipleLines));
84+
state = State::ProcessingLinkUrl(UrlState::FilledBrokenMultipleLines);
13785
continue;
13886
}
13987

140-
self.state = Some(State::ProcessingLinkUrl(UrlState::FilledEntireSingleLine));
88+
state = State::ProcessingLinkUrl(UrlState::FilledEntireSingleLine);
14189
}
14290
},
14391
};
14492
}
14593
}
146-
147-
fn reset_lookup(&mut self) {
148-
self.state = None;
149-
self.active_span = None;
150-
self.active_pos_start = 0;
151-
}
152-
153-
fn record_broken_link(&mut self, cx: &LateContext<'_>, pos_end: u32, reason: BrokenLinkReason) {
154-
if let Some(attr_span) = self.active_span {
155-
let start = BytePos(self.active_pos_start);
156-
let end = BytePos(pos_end);
157-
158-
let span = Span::new(start, end, attr_span.ctxt(), attr_span.parent());
159-
160-
let reason_msg = match reason {
161-
BrokenLinkReason::MultipleLines => "broken across multiple lines",
162-
};
163-
164-
span_lint(
165-
cx,
166-
DOC_BROKEN_LINK,
167-
span,
168-
format!("possible broken doc link: {reason_msg}"),
169-
);
170-
}
171-
}
17294
}
17395

174-
// TODO: remove this helper function once all changes are good.
175-
fn log(text: &str) {
176-
use std::fs::OpenOptions;
177-
use std::io::Write;
96+
fn report_broken_link(cx: &LateContext<'_>, frag_span: Span, offset: usize, reason: BrokenLinkReason) {
97+
let start = frag_span.lo();
98+
let end = start + BytePos::from_usize(offset + 5);
17899

179-
let filename = "../rust-clippy-debug-test.txt";
180-
let mut file = OpenOptions::new().write(true).append(true).open(filename).unwrap();
100+
let span = Span::new(start, end, frag_span.ctxt(), frag_span.parent());
181101

182-
if let Err(e) = writeln!(file, "{text}") {
183-
eprintln!("Couldn't write to file: {}", e);
184-
}
102+
let reason_msg = match reason {
103+
BrokenLinkReason::MultipleLines => "broken across multiple lines",
104+
};
105+
106+
span_lint(
107+
cx,
108+
DOC_BROKEN_LINK,
109+
span,
110+
format!("possible broken doc link: {reason_msg}"),
111+
);
185112
}

clippy_lints/src/doc/mod.rs

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -815,17 +815,12 @@ fn check_attrs(cx: &LateContext<'_>, valid_idents: &FxHashSet<String>, attrs: &[
815815
return Some(DocHeaders::default());
816816
}
817817

818-
// // Run broken link checker before parsing the document.
819-
// broken_link::check(cx, attrs);
820-
821818
// We don't want the parser to choke on intra doc links. Since we don't
822819
// actually care about rendering them, just pretend that all broken links
823820
// point to a fake address.
824821
#[expect(clippy::unnecessary_wraps)] // we're following a type signature
825822
let fake_broken_link_callback = |bl: BrokenLink<'_>| -> Option<(CowStr<'_>, CowStr<'_>)> {
826-
// NOTE: temporary change to check if we can handle broken links report from
827-
// this function.
828-
broken_link::check_v2(cx, &bl, &doc, &fragments);
823+
broken_link::check(cx, &bl, &doc, &fragments);
829824
Some(("fake".into(), "fake".into()))
830825
};
831826

tests/ui/doc_broken_link.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,16 @@ pub fn doc_invalid_link_broken_url_scheme_part() {}
4949
//~^^ ERROR: possible broken doc link: broken across multiple lines
5050
pub fn doc_invalid_link_broken_url_host_part() {}
5151

52+
/// Test invalid link, for multiple urls in the same block of comment.
53+
/// There is a [fist link - invalid](https://test
54+
/// .fake) then it continues
55+
//~^^ ERROR: possible broken doc link: broken across multiple lines
56+
/// with a [second link - valid](https://test.fake/doc_valid_link) and another [third link - invalid](https://test
57+
/// .fake). It ends with another
58+
//~^^ ERROR: possible broken doc link: broken across multiple lines
59+
/// line of comment.
60+
pub fn doc_multiple_invalid_link_broken_url() {}
61+
5262
/// This might be considered a link false positive
5363
/// and should be ignored by this lint rule:
5464
/// Example of referencing some code with brackets [FakeType].

tests/ui/doc_broken_link.stderr

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
error: possible broken doc link: broken across multiple lines
2+
--> tests/ui/doc_broken_link.rs:41:5
3+
|
4+
LL | /// [doc invalid link broken url scheme part part](https://
5+
| _____^
6+
LL | | /// test.fake/doc_invalid_link_broken_url_scheme_part)
7+
| |______________________________________________________^
8+
|
9+
= note: `-D clippy::doc-broken-link` implied by `-D warnings`
10+
= help: to override `-D warnings` add `#[allow(clippy::doc_broken_link)]`
11+
12+
error: possible broken doc link: broken across multiple lines
13+
--> tests/ui/doc_broken_link.rs:41:5
14+
|
15+
LL | /// [doc invalid link broken url scheme part part](https://
16+
| _____^
17+
LL | | /// test.fake/doc_invalid_link_broken_url_scheme_part)
18+
| |______________________________________________________^
19+
|
20+
= note: duplicate diagnostic emitted due to `-Z deduplicate-diagnostics=no`
21+
22+
error: possible broken doc link: broken across multiple lines
23+
--> tests/ui/doc_broken_link.rs:47:5
24+
|
25+
LL | /// [doc invalid link broken url host part](https://test
26+
| _____^
27+
LL | | /// .fake/doc_invalid_link_broken_url_host_part)
28+
| |________________________________________________^
29+
30+
error: possible broken doc link: broken across multiple lines
31+
--> tests/ui/doc_broken_link.rs:47:5
32+
|
33+
LL | /// [doc invalid link broken url host part](https://test
34+
| _____^
35+
LL | | /// .fake/doc_invalid_link_broken_url_host_part)
36+
| |________________________________________________^
37+
|
38+
= note: duplicate diagnostic emitted due to `-Z deduplicate-diagnostics=no`
39+
40+
error: possible broken doc link: broken across multiple lines
41+
--> tests/ui/doc_broken_link.rs:53:16
42+
|
43+
LL | /// There is a [fist link - invalid](https://test
44+
| ________________^
45+
LL | | /// .fake) then it continues
46+
| |__________^
47+
48+
error: possible broken doc link: broken across multiple lines
49+
--> tests/ui/doc_broken_link.rs:56:80
50+
|
51+
LL | /// with a [second link - valid](https://test.fake/doc_valid_link) and another [third link - invalid](https://test
52+
| ________________________________________________________________________________^
53+
LL | | /// .fake). It ends with another
54+
| |__________^
55+
56+
error: possible broken doc link: broken across multiple lines
57+
--> tests/ui/doc_broken_link.rs:53:16
58+
|
59+
LL | /// There is a [fist link - invalid](https://test
60+
| ________________^
61+
LL | | /// .fake) then it continues
62+
| |__________^
63+
|
64+
= note: duplicate diagnostic emitted due to `-Z deduplicate-diagnostics=no`
65+
66+
error: possible broken doc link: broken across multiple lines
67+
--> tests/ui/doc_broken_link.rs:56:80
68+
|
69+
LL | /// with a [second link - valid](https://test.fake/doc_valid_link) and another [third link - invalid](https://test
70+
| ________________________________________________________________________________^
71+
LL | | /// .fake). It ends with another
72+
| |__________^
73+
|
74+
= note: duplicate diagnostic emitted due to `-Z deduplicate-diagnostics=no`
75+
76+
error: aborting due to 8 previous errors
77+

0 commit comments

Comments
 (0)