@@ -17,6 +17,7 @@ use crate::{
1717 inline_preprocessor:: InlinePreprocessorParserState ,
1818 inline_processing:: {
1919 adjust_and_log_parse_error, parse_inlines, preprocess_inline_content, process_inlines,
20+ process_inlines_no_autolinks,
2021 } ,
2122 location_mapping:: map_inline_locations,
2223 manpage:: {
@@ -3555,9 +3556,12 @@ peg::parser! {
35553556 }
35563557
35573558 pub ( crate ) rule inlines( offset: usize , block_metadata: & BlockParsingMetadata ) -> Vec <InlineNode >
3558- = ( non_plain_text( offset, block_metadata) / plain_text( offset, block_metadata) ) +
3559+ = ( non_plain_text( offset, block_metadata, true ) / plain_text( offset, block_metadata, true ) ) +
35593560
3560- rule non_plain_text( offset: usize , block_metadata: & BlockParsingMetadata ) -> InlineNode
3561+ pub ( crate ) rule inlines_no_autolinks( offset: usize , block_metadata: & BlockParsingMetadata ) -> Vec <InlineNode >
3562+ = ( non_plain_text( offset, block_metadata, false ) / plain_text( offset, block_metadata, false ) ) +
3563+
3564+ rule non_plain_text( offset: usize , block_metadata: & BlockParsingMetadata , allow_autolinks: bool ) -> InlineNode
35613565 = inline: (
35623566 // Escaped superscript/subscript must come first - produces RawText to prevent re-parsing
35633567 escaped_super_sub: escaped_superscript_subscript( offset) { escaped_super_sub }
@@ -3587,7 +3591,7 @@ peg::parser! {
35873591 / url_macro: url_macro( offset, block_metadata) { url_macro }
35883592 / pass: inline_pass( offset) { pass }
35893593 / link_macro: link_macro( offset) { link_macro }
3590- / inline_autolink: inline_autolink( offset) { inline_autolink }
3594+ / check_autolinks ( allow_autolinks ) inline_autolink: inline_autolink( offset) { inline_autolink }
35913595 / inline_line_break: inline_line_break( offset) { inline_line_break }
35923596 / bold_text_unconstrained: bold_text_unconstrained( offset, block_metadata) { bold_text_unconstrained }
35933597 / bold_text_constrained: bold_text_constrained( offset, block_metadata) { bold_text_constrained }
@@ -4000,7 +4004,7 @@ peg::parser! {
40004004 }
40014005 }
40024006 let text = if let Some ( text) = text {
4003- process_inlines ( state, block_metadata, & start, end, offset, & text)
4007+ process_inlines_no_autolinks ( state, block_metadata, & start, end, offset, & text)
40044008 . map_err( |e| {
40054009 tracing:: error!( ?e, url_text = text, "could not process URL macro text" ) ;
40064010 "could not process URL macro text"
@@ -4048,7 +4052,7 @@ peg::parser! {
40484052 }
40494053 }
40504054 let text = if let Some ( text) = text {
4051- process_inlines ( state, block_metadata, & start, end, offset, & text)
4055+ process_inlines_no_autolinks ( state, block_metadata, & start, end, offset, & text)
40524056 . map_err( |e| {
40534057 tracing:: error!( ?e, url_text = text, "could not process mailto macro text" ) ;
40544058 "could not process mailto macro text"
@@ -4065,12 +4069,16 @@ peg::parser! {
40654069 } ) ) )
40664070 }
40674071
4072+ rule check_autolinks( allow: bool ) -> ( )
4073+ = { ? if allow { Ok ( ( ) ) } else { Err ( "autolinks suppressed" ) } }
4074+
40684075 rule inline_autolink( offset: usize ) -> InlineNode
4069- = start: position!( )
4076+ =
4077+ start: position!( )
40704078 url_info: (
40714079 "<" url: url( ) ">" { ( url, true ) }
40724080 / "<" url: email_address( ) ">" { ( format!( "mailto:{url}" ) , true ) }
4073- / url: url ( ) { ( url, false ) }
4081+ / url: bare_url ( ) { ( url, false ) }
40744082 / url: email_address( ) { ( format!( "mailto:{url}" ) , false ) }
40754083 )
40764084 end: position!( )
@@ -4299,7 +4307,7 @@ peg::parser! {
42994307 if trimmed. is_empty( ) {
43004308 vec![ ]
43014309 } else {
4302- process_inlines ( state, block_metadata, & start, end, offset, trimmed)
4310+ process_inlines_no_autolinks ( state, block_metadata, & start, end, offset, trimmed)
43034311 . map_err( |e| {
43044312 tracing:: error!( ?e, xref_text = trimmed, "could not process xref text" ) ;
43054313 "could not process xref text"
@@ -4331,7 +4339,7 @@ peg::parser! {
43314339 let text = if raw_text. is_empty( ) {
43324340 vec![ ]
43334341 } else {
4334- process_inlines ( state, block_metadata, & start, end, offset, raw_text)
4342+ process_inlines_no_autolinks ( state, block_metadata, & start, end, offset, raw_text)
43354343 . map_err( |e| {
43364344 tracing:: error!( ?e, xref_text = raw_text, "could not process xref text" ) ;
43374345 "could not process xref text"
@@ -4860,14 +4868,14 @@ peg::parser! {
48604868 } ) )
48614869 }
48624870
4863- rule plain_text( offset: usize , block_metadata: & BlockParsingMetadata ) -> InlineNode
4871+ rule plain_text( offset: usize , block_metadata: & BlockParsingMetadata , allow_autolinks : bool ) -> InlineNode
48644872 = start_pos: position!( )
48654873 content: $( (
48664874 // Escape sequences for superscript/subscript markers - only when NOT followed by
48674875 // a complete pattern (those are handled by escaped_superscript_subscript rule)
48684876 "\\ " "^" !( [ ^'^' | ' ' | '\t' | '\n' ] + "^" )
48694877 / "\\ " "~" !( [ ^'~' | ' ' | '\t' | '\n' ] + "~" )
4870- / ( !( eol( ) * <2 , > / ![ _] / escaped_syntax_match( ) / index_term_match( ) / inline_anchor_match( ) / cross_reference_shorthand_match( ) / cross_reference_macro_match( ) / hard_wrap( offset) / footnote_match( offset, block_metadata) / inline_image( start_pos, block_metadata) / inline_icon( start_pos, block_metadata) / inline_stem( start_pos) / inline_keyboard( start_pos) / inline_button( start_pos) / inline_menu( start_pos) / mailto_macro( start_pos, block_metadata) / url_macro( start_pos, block_metadata) / inline_pass( start_pos) / link_macro( start_pos) / inline_autolink( start_pos) / inline_line_break( start_pos) / bold_text_unconstrained( start_pos, block_metadata) / bold_text_constrained_match( ) / italic_text_unconstrained( start_pos, block_metadata) / italic_text_constrained_match( ) / monospace_text_unconstrained( start_pos, block_metadata) / monospace_text_constrained_match( ) / highlight_text_unconstrained( start_pos, block_metadata) / highlight_text_constrained_match( ) / superscript_text( start_pos, block_metadata) / subscript_text( start_pos, block_metadata) / curved_quotation_text( start_pos, block_metadata) / curved_apostrophe_text( start_pos, block_metadata) / standalone_curved_apostrophe( start_pos, block_metadata) ) [ _] )
4878+ / ( !( eol( ) * <2 , > / ![ _] / escaped_syntax_match( ) / index_term_match( ) / inline_anchor_match( ) / cross_reference_shorthand_match( ) / cross_reference_macro_match( ) / hard_wrap( offset) / footnote_match( offset, block_metadata) / inline_image( start_pos, block_metadata) / inline_icon( start_pos, block_metadata) / inline_stem( start_pos) / inline_keyboard( start_pos) / inline_button( start_pos) / inline_menu( start_pos) / mailto_macro( start_pos, block_metadata) / url_macro( start_pos, block_metadata) / inline_pass( start_pos) / link_macro( start_pos) / ( check_autolinks( allow_autolinks) inline_autolink( start_pos) ) / inline_line_break( start_pos) / bold_text_unconstrained( start_pos, block_metadata) / bold_text_constrained_match( ) / italic_text_unconstrained( start_pos, block_metadata) / italic_text_constrained_match( ) / monospace_text_unconstrained( start_pos, block_metadata) / monospace_text_constrained_match( ) / highlight_text_unconstrained( start_pos, block_metadata) / highlight_text_constrained_match( ) / superscript_text( start_pos, block_metadata) / subscript_text( start_pos, block_metadata) / curved_quotation_text( start_pos, block_metadata) / curved_apostrophe_text( start_pos, block_metadata) / standalone_curved_apostrophe( start_pos, block_metadata) ) [ _] )
48714879 ) +)
48724880 end: position!( )
48734881 {
@@ -5590,6 +5598,63 @@ peg::parser! {
55905598 Ok ( strip_url_backslash_escapes( & processed. text) )
55915599 }
55925600
5601+ /// URL for bare autolinks — avoids capturing trailing sentence punctuation
5602+ /// (., ;, !, etc.) by only consuming punctuation when more URL chars follow.
5603+ rule bare_url( ) -> String =
5604+ proto: $( "https" / "http" / "ftp" / "irc" ) "://" path: bare_url_path( )
5605+ { format!( "{proto}://{path}" ) }
5606+
5607+ /// URL path for bare autolinks. Like url_path() but:
5608+ /// - Trailing punctuation (. , ; ! ? : ' *) only consumed when followed by more URL chars.
5609+ /// - `)` only consumed as part of a balanced `(...)` group, preventing capture of
5610+ /// sentence-level parens like `(see http://example.com)`.
5611+ rule bare_url_path( ) -> String = path: $(
5612+ bare_url_safe_char( )
5613+ ( bare_url_safe_char( )
5614+ / bare_url_paren_group( )
5615+ / "("
5616+ / bare_url_trailing_char( ) & bare_url_char( )
5617+ ) *
5618+ )
5619+ { ?
5620+ let inline_state = InlinePreprocessorParserState :: new(
5621+ path,
5622+ state. line_map. clone( ) ,
5623+ & state. input,
5624+ ) ;
5625+ let processed = inline_preprocessing:: run( path, & state. document_attributes, & inline_state)
5626+ . map_err( |e| {
5627+ tracing:: error!( ?e, "could not preprocess bare url path" ) ;
5628+ "could not preprocess bare url path"
5629+ } ) ?;
5630+ for warning in inline_state. drain_warnings( ) {
5631+ state. add_warning( warning) ;
5632+ }
5633+ Ok ( strip_url_backslash_escapes( & processed. text) )
5634+ }
5635+
5636+ /// Balanced parenthesized group in a URL path.
5637+ /// Handles nested parens: `http://example.com/wiki/Foo_(bar_(baz))`
5638+ /// Only `)` consumed via this rule — unbalanced `)` is never captured.
5639+ rule bare_url_paren_group( )
5640+ = "(" ( bare_url_safe_char( ) / bare_url_trailing_char( ) / bare_url_paren_group( ) / "(" ) * ")"
5641+
5642+ /// URL chars that are safe to end a bare URL — won't be confused with sentence punctuation.
5643+ /// Excludes `(` and `)` which are handled separately via `bare_url_paren_group`.
5644+ rule bare_url_safe_char( ) = [ 'A' ..='Z' | 'a' ..='z' | '0' ..='9' | '-' | '_' | '~'
5645+ | '/' | '#' | '@' | '$' | '&'
5646+ | '+' | '=' | '%' | '\\' ]
5647+
5648+ /// URL chars that are valid mid-URL but should not end a bare URL.
5649+ /// Excludes `)` which is only consumed via balanced `bare_url_paren_group`.
5650+ rule bare_url_trailing_char( ) = [ '.' | ',' | ';' | '!' | '?' | ':' | '\'' | '*' ]
5651+
5652+ /// Any valid URL path char (for lookahead in trailing char rule).
5653+ /// Includes `(` because it can start a paren group.
5654+ /// Excludes `)` so that trailing chars before `)` aren't greedily consumed
5655+ /// (e.g., `http://example.com.)` keeps both `.` and `)` outside).
5656+ rule bare_url_char( ) = bare_url_safe_char( ) / bare_url_trailing_char( ) / "("
5657+
55935658 /// Filesystem path - conservative character set for cross-platform compatibility
55945659 /// Includes '{' and '}' for `AsciiDoc` attribute substitution
55955660 pub rule path( ) -> String = path: $( [ 'A' ..='Z' | 'a' ..='z' | '0' ..='9' | '{' | '}' | '_' | '-' | '.' | '/' | '\\' ] +)
0 commit comments