11use clippy_utils:: diagnostics:: span_lint;
22use pulldown_cmark:: BrokenLink as PullDownBrokenLink ;
3- use rustc_ast:: { AttrKind , AttrStyle , Attribute } ;
43use rustc_lint:: LateContext ;
5- use rustc_resolve:: rustdoc:: DocFragment ;
6- use rustc_span:: { BytePos , Span } ;
4+ use rustc_resolve:: rustdoc:: { DocFragment , source_span_for_markdown_range } ;
5+ use rustc_span:: { BytePos , Pos , Span } ;
76
87use super :: DOC_BROKEN_LINK ;
98
10- pub fn check ( cx : & LateContext < ' _ > , attrs : & [ Attribute ] ) {
11- BrokenLinkReporter :: warn_if_broken_links ( cx, attrs) ;
12- }
13-
14- // NOTE: temporary change to check if we can handle broken links from pulldown_cmark parser.
15- pub fn check_v2 ( _cx : & LateContext < ' _ > , bl : & PullDownBrokenLink < ' _ > , doc : & String , fragments : & Vec < DocFragment > ) {
16- log ( format ! ( "\n ---------------------" , ) . as_str ( ) ) ;
17- log ( format ! ( "\n doc={doc:#?}" , ) . as_str ( ) ) ;
18- log ( format ! ( "\n fragments={fragments:#?}" , ) . as_str ( ) ) ;
19-
20- log ( format ! ( "\n bl={bl:#?}" , ) . as_str ( ) ) ;
21-
22- let text: String = doc[ bl. span . clone ( ) ] . chars ( ) . collect ( ) ;
23- log ( format ! ( "\n text based on 'bl.span' range={text:#?}" , ) . as_str ( ) ) ;
24- log ( format ! ( "\n ---------------------" , ) . as_str ( ) ) ;
9+ /// Scan and report broken link on documents.
10+ /// It ignores false positives detected by pulldown_cmark, and only
11+ /// warns users when the broken link is consider a URL.
12+ pub fn check ( cx : & LateContext < ' _ > , bl : & PullDownBrokenLink < ' _ > , doc : & String , fragments : & Vec < DocFragment > ) {
13+ warn_if_broken_link ( cx, bl, doc, fragments) ;
2514}
2615
2716/// The reason why a link is considered broken.
@@ -34,152 +23,90 @@ enum BrokenLinkReason {
3423 MultipleLines ,
3524}
3625
37- /// Broken link data.
38- struct BrokenLink {
39- reason : BrokenLinkReason ,
40- span : Span ,
41- }
42-
26+ #[ derive( Debug ) ]
4327enum State {
4428 ProcessingLinkText ,
4529 ProcessedLinkText ,
4630 ProcessingLinkUrl ( UrlState ) ,
4731}
4832
33+ #[ derive( Debug ) ]
4934enum UrlState {
5035 Empty ,
5136 FilledEntireSingleLine ,
5237 FilledBrokenMultipleLines ,
5338}
5439
55- /// Scan AST attributes looking up in doc comments for broken links
56- /// which rustdoc won't be able to properly create link tags later,
57- /// and warn about those failures.
58- struct BrokenLinkReporter {
59- state : Option < State > ,
60-
61- /// Keep track of the span for the processing broken link.
62- active_span : Option < Span > ,
63-
64- /// Keep track where exactly the link definition has started in the code.
65- active_pos_start : u32 ,
66- }
67-
68- impl BrokenLinkReporter {
69- fn warn_if_broken_links ( cx : & LateContext < ' _ > , attrs : & [ Attribute ] ) {
70- let mut reporter = BrokenLinkReporter {
71- state : None ,
72- active_pos_start : 0 ,
73- active_span : None ,
74- } ;
75-
76- for attr in attrs {
77- if let AttrKind :: DocComment ( _com_kind, sym) = attr. kind
78- && let AttrStyle :: Outer = attr. style
79- {
80- reporter. scan_line ( cx, sym. as_str ( ) , attr. span ) ;
81- }
82- }
83- }
84-
85- fn scan_line ( & mut self , cx : & LateContext < ' _ > , line : & str , attr_span : Span ) {
86- let reading_link_url_new_line = matches ! (
87- self . state,
88- Some ( State :: ProcessingLinkUrl ( UrlState :: FilledEntireSingleLine ) )
89- ) ;
40+ fn warn_if_broken_link ( cx : & LateContext < ' _ > , bl : & PullDownBrokenLink < ' _ > , doc : & String , fragments : & Vec < DocFragment > ) {
41+ if let Some ( span) = source_span_for_markdown_range ( cx. tcx , doc, & bl. span , fragments) {
42+ // `PullDownBrokenLink` provided by pulldown_cmark always
43+ // start with `[` which makes pulldown_cmark consider this a link tag.
44+ let mut state = State :: ProcessingLinkText ;
9045
91- for ( pos, c) in line. char_indices ( ) {
92- if pos == 0 && c. is_whitespace ( ) {
93- // ignore prefix whitespace on comments
94- continue ;
95- }
46+ // Whether it was detected a line break within the link tag url part.
47+ let mut reading_link_url_new_line = false ;
9648
97- match & self . state {
98- None => {
99- if c == '[' {
100- self . state = Some ( State :: ProcessingLinkText ) ;
101- // +3 skips the opening delimiter
102- self . active_pos_start = attr_span. lo ( ) . 0 + u32:: try_from ( pos) . unwrap ( ) + 3 ;
103- self . active_span = Some ( attr_span) ;
104- }
105- } ,
106- Some ( State :: ProcessingLinkText ) => {
49+ // Skip the first char because we already know it is a `[` char.
50+ for ( abs_pos, c) in doc. char_indices ( ) . skip ( bl. span . start + 1 ) {
51+ match & state {
52+ State :: ProcessingLinkText => {
10753 if c == ']' {
108- self . state = Some ( State :: ProcessedLinkText ) ;
54+ state = State :: ProcessedLinkText ;
10955 }
11056 } ,
111- Some ( State :: ProcessedLinkText ) => {
57+ State :: ProcessedLinkText => {
11258 if c == '(' {
113- self . state = Some ( State :: ProcessingLinkUrl ( UrlState :: Empty ) ) ;
59+ state = State :: ProcessingLinkUrl ( UrlState :: Empty ) ;
11460 } else {
115- // not a real link, start lookup over again
116- self . reset_lookup ( ) ;
61+ // not a real link, just skip it without reporting a broken link for it.
62+ break ;
11763 }
11864 } ,
119- Some ( State :: ProcessingLinkUrl ( url_state) ) => {
65+ State :: ProcessingLinkUrl ( url_state) => {
66+ if c == '\n' {
67+ reading_link_url_new_line = true ;
68+ continue ;
69+ }
70+
12071 if c == ')' {
12172 // record full broken link tag
12273 if let UrlState :: FilledBrokenMultipleLines = url_state {
123- // +3 skips the opening delimiter and +1 to include the closing parethesis
124- let pos_end = attr_span. lo ( ) . 0 + u32:: try_from ( pos) . unwrap ( ) + 4 ;
125- self . record_broken_link ( cx, pos_end, BrokenLinkReason :: MultipleLines ) ;
126- self . reset_lookup ( ) ;
74+ let offset = abs_pos - bl. span . start ;
75+ report_broken_link ( cx, span, offset, BrokenLinkReason :: MultipleLines ) ;
12776 }
128- self . reset_lookup ( ) ;
129- continue ;
77+ break ;
13078 }
13179
13280 if !c. is_whitespace ( ) {
13381 if reading_link_url_new_line {
13482 // It was reading a link url which was entirely in a single line, but a new char
13583 // was found in this new line which turned the url into a broken state.
136- self . state = Some ( State :: ProcessingLinkUrl ( UrlState :: FilledBrokenMultipleLines ) ) ;
84+ state = State :: ProcessingLinkUrl ( UrlState :: FilledBrokenMultipleLines ) ;
13785 continue ;
13886 }
13987
140- self . state = Some ( State :: ProcessingLinkUrl ( UrlState :: FilledEntireSingleLine ) ) ;
88+ state = State :: ProcessingLinkUrl ( UrlState :: FilledEntireSingleLine ) ;
14189 }
14290 } ,
14391 } ;
14492 }
14593 }
146-
147- fn reset_lookup ( & mut self ) {
148- self . state = None ;
149- self . active_span = None ;
150- self . active_pos_start = 0 ;
151- }
152-
153- fn record_broken_link ( & mut self , cx : & LateContext < ' _ > , pos_end : u32 , reason : BrokenLinkReason ) {
154- if let Some ( attr_span) = self . active_span {
155- let start = BytePos ( self . active_pos_start ) ;
156- let end = BytePos ( pos_end) ;
157-
158- let span = Span :: new ( start, end, attr_span. ctxt ( ) , attr_span. parent ( ) ) ;
159-
160- let reason_msg = match reason {
161- BrokenLinkReason :: MultipleLines => "broken across multiple lines" ,
162- } ;
163-
164- span_lint (
165- cx,
166- DOC_BROKEN_LINK ,
167- span,
168- format ! ( "possible broken doc link: {reason_msg}" ) ,
169- ) ;
170- }
171- }
17294}
17395
174- // TODO: remove this helper function once all changes are good.
175- fn log ( text : & str ) {
176- use std:: fs:: OpenOptions ;
177- use std:: io:: Write ;
96+ fn report_broken_link ( cx : & LateContext < ' _ > , frag_span : Span , offset : usize , reason : BrokenLinkReason ) {
97+ let start = frag_span. lo ( ) ;
98+ let end = start + BytePos :: from_usize ( offset + 5 ) ;
17899
179- let filename = "../rust-clippy-debug-test.txt" ;
180- let mut file = OpenOptions :: new ( ) . write ( true ) . append ( true ) . open ( filename) . unwrap ( ) ;
100+ let span = Span :: new ( start, end, frag_span. ctxt ( ) , frag_span. parent ( ) ) ;
181101
182- if let Err ( e) = writeln ! ( file, "{text}" ) {
183- eprintln ! ( "Couldn't write to file: {}" , e) ;
184- }
102+ let reason_msg = match reason {
103+ BrokenLinkReason :: MultipleLines => "broken across multiple lines" ,
104+ } ;
105+
106+ span_lint (
107+ cx,
108+ DOC_BROKEN_LINK ,
109+ span,
110+ format ! ( "possible broken doc link: {reason_msg}" ) ,
111+ ) ;
185112}
0 commit comments