@@ -546,26 +546,16 @@ fn process_malformed_links_in_events(events: Vec<Event>) -> Vec<Event> {
546546fn process_malformed_links_in_text ( text : & str , events : & mut Vec < Event > ) {
547547 use pulldown_cmark:: { Event , Tag , TagEnd , LinkType } ;
548548
549- let mut remaining = text;
549+ // Combined regex with named captures
550+ let combined_regex = regex:: Regex :: new (
551+ r"(?P<malformed>\[(?P<malformed_text>[^\]]+)\]\((?P<malformed_url>[^)]*[ \{\[\(][^)]*)\))|(?P<reference>\[(?P<reference_text>[^\]]+)\]\[\])"
552+ ) . unwrap ( ) ;
550553
551- // Handle malformed links with problematic characters: spaces, {, [, (
552- let malformed_regex = regex:: Regex :: new ( r"\[([^\]]+)\]\(([^)]*[ \{\[\(][^)]*)\)" ) . unwrap ( ) ;
553-
554- // Handle reference-style links: [foo.rs][] or [foo.rs:22][]
555- let reference_regex = regex:: Regex :: new ( r"\[([^\]]+)\]\[\]" ) . unwrap ( ) ;
556-
557- let mut last_end = 0 ;
558-
559- // Process malformed links first
560- for m in malformed_regex. find_iter ( text) {
561- // Add text before the match
562- if m. start ( ) > last_end {
563- events. push ( Event :: Text ( text[ last_end..m. start ( ) ] . to_string ( ) . into ( ) ) ) ;
564- }
565-
566- if let Some ( caps) = malformed_regex. captures ( & text[ m. start ( ) ..m. end ( ) ] ) {
567- let link_text = caps[ 1 ] . to_string ( ) ;
568- let url = caps[ 2 ] . to_string ( ) ;
554+ process_regex_matches ( text, & combined_regex, events, |caps, events| {
555+ if caps. name ( "malformed" ) . is_some ( ) {
556+ // Malformed link: [text](url with spaces)
557+ let link_text = caps. name ( "malformed_text" ) . unwrap ( ) . as_str ( ) . to_string ( ) ;
558+ let url = caps. name ( "malformed_url" ) . unwrap ( ) . as_str ( ) . to_string ( ) ;
569559
570560 // Generate proper link events
571561 events. push ( Event :: Start ( Tag :: Link {
@@ -576,24 +566,10 @@ fn process_malformed_links_in_text(text: &str, events: &mut Vec<Event>) {
576566 } ) ) ;
577567 events. push ( Event :: Text ( link_text. into ( ) ) ) ;
578568 events. push ( Event :: End ( TagEnd :: Link ) ) ;
579- }
580-
581- last_end = m. end ( ) ;
582- }
583-
584- // Update remaining text
585- remaining = & text[ last_end..] ;
586- last_end = 0 ;
587-
588- // Process reference-style links in remaining text
589- for m in reference_regex. find_iter ( remaining) {
590- // Add text before the match
591- if m. start ( ) > last_end {
592- events. push ( Event :: Text ( remaining[ last_end..m. start ( ) ] . to_string ( ) . into ( ) ) ) ;
593- }
594-
595- if let Some ( caps) = reference_regex. captures ( & remaining[ m. start ( ) ..m. end ( ) ] ) {
596- let link_text = caps[ 1 ] . to_string ( ) ;
569+
570+ } else if caps. name ( "reference" ) . is_some ( ) {
571+ // Reference link: [text][]
572+ let link_text = caps. name ( "reference_text" ) . unwrap ( ) . as_str ( ) . to_string ( ) ;
597573
598574 // Determine URL based on pattern
599575 let url = if let Some ( line_caps) = regex:: Regex :: new ( r"^([^:]+\.[a-z]+):(\d+)$" ) . unwrap ( ) . captures ( & link_text) {
@@ -604,9 +580,8 @@ fn process_malformed_links_in_text(text: &str, events: &mut Vec<Event>) {
604580 format ! ( "dialectic:{}" , link_text)
605581 } else {
606582 // For other reference links, leave as-is for now
607- events. push ( Event :: Text ( remaining[ m. start ( ) ..m. end ( ) ] . to_string ( ) . into ( ) ) ) ;
608- last_end = m. end ( ) ;
609- continue ;
583+ events. push ( Event :: Text ( format ! ( "[{}][]" , link_text) . into ( ) ) ) ;
584+ return ;
610585 } ;
611586
612587 // Generate proper link events
@@ -619,13 +594,35 @@ fn process_malformed_links_in_text(text: &str, events: &mut Vec<Event>) {
619594 events. push ( Event :: Text ( link_text. into ( ) ) ) ;
620595 events. push ( Event :: End ( TagEnd :: Link ) ) ;
621596 }
597+ } ) ;
598+ }
599+
600+ fn process_regex_matches < F > (
601+ text : & str ,
602+ regex : & regex:: Regex ,
603+ events : & mut Vec < Event > ,
604+ mut handle_match : F ,
605+ ) where
606+ F : FnMut ( & regex:: Captures , & mut Vec < Event > ) ,
607+ {
608+ let mut last_end = 0 ;
609+
610+ for m in regex. find_iter ( text) {
611+ // Add text before the match
612+ if m. start ( ) > last_end {
613+ events. push ( Event :: Text ( text[ last_end..m. start ( ) ] . to_string ( ) . into ( ) ) ) ;
614+ }
615+
616+ if let Some ( caps) = regex. captures ( & text[ m. start ( ) ..m. end ( ) ] ) {
617+ handle_match ( & caps, events) ;
618+ }
622619
623620 last_end = m. end ( ) ;
624621 }
625622
626623 // Add any remaining text
627- if last_end < remaining . len ( ) {
628- events. push ( Event :: Text ( remaining [ last_end..] . to_string ( ) . into ( ) ) ) ;
624+ if last_end < text . len ( ) {
625+ events. push ( Event :: Text ( text [ last_end..] . to_string ( ) . into ( ) ) ) ;
629626 }
630627}
631628
@@ -747,4 +744,19 @@ Also [main.rs][] and [utils.ts:42][].
747744 ]
748745 "# ] ] ) ;
749746 }
747+
748+ #[ test]
749+ fn test_mixed_link_types_in_single_text ( ) {
750+ let markdown = r#"
751+ Check [foo.rs][], [foo](foo.rs?a b), [bar.rs][].
752+ "# ;
753+
754+ check_extracted_urls ( markdown, expect ! [ [ r#"
755+ [
756+ "dialectic:foo.rs",
757+ "dialectic:foo.rs?regex=a%20b",
758+ "dialectic:bar.rs",
759+ ]
760+ "# ] ] ) ;
761+ }
750762}
0 commit comments