Skip to content
This repository was archived by the owner on Sep 23, 2025. It is now read-only.

Commit 59689d9

Browse files
committed
Refactor malformed link processing with single-pass approach
- Combine malformed and reference link regexes with named captures - Add process_regex_matches helper to separate text manipulation from match handling - Single pass processes both link types in correct order, fixing edge cases - Add test for mixed link types that would have broken with old two-pass approach - Much cleaner and more maintainable code structure
1 parent 389cd59 commit 59689d9

File tree

1 file changed

+54
-42
lines changed

1 file changed

+54
-42
lines changed

server/src/ide.rs

Lines changed: 54 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -546,26 +546,16 @@ fn process_malformed_links_in_events(events: Vec<Event>) -> Vec<Event> {
546546
fn process_malformed_links_in_text(text: &str, events: &mut Vec<Event>) {
547547
use pulldown_cmark::{Event, Tag, TagEnd, LinkType};
548548

549-
let mut remaining = text;
549+
// Combined regex with named captures
550+
let combined_regex = regex::Regex::new(
551+
r"(?P<malformed>\[(?P<malformed_text>[^\]]+)\]\((?P<malformed_url>[^)]*[ \{\[\(][^)]*)\))|(?P<reference>\[(?P<reference_text>[^\]]+)\]\[\])"
552+
).unwrap();
550553

551-
// Handle malformed links with problematic characters: spaces, {, [, (
552-
let malformed_regex = regex::Regex::new(r"\[([^\]]+)\]\(([^)]*[ \{\[\(][^)]*)\)").unwrap();
553-
554-
// Handle reference-style links: [foo.rs][] or [foo.rs:22][]
555-
let reference_regex = regex::Regex::new(r"\[([^\]]+)\]\[\]").unwrap();
556-
557-
let mut last_end = 0;
558-
559-
// Process malformed links first
560-
for m in malformed_regex.find_iter(text) {
561-
// Add text before the match
562-
if m.start() > last_end {
563-
events.push(Event::Text(text[last_end..m.start()].to_string().into()));
564-
}
565-
566-
if let Some(caps) = malformed_regex.captures(&text[m.start()..m.end()]) {
567-
let link_text = caps[1].to_string();
568-
let url = caps[2].to_string();
554+
process_regex_matches(text, &combined_regex, events, |caps, events| {
555+
if caps.name("malformed").is_some() {
556+
// Malformed link: [text](url with spaces)
557+
let link_text = caps.name("malformed_text").unwrap().as_str().to_string();
558+
let url = caps.name("malformed_url").unwrap().as_str().to_string();
569559

570560
// Generate proper link events
571561
events.push(Event::Start(Tag::Link {
@@ -576,24 +566,10 @@ fn process_malformed_links_in_text(text: &str, events: &mut Vec<Event>) {
576566
}));
577567
events.push(Event::Text(link_text.into()));
578568
events.push(Event::End(TagEnd::Link));
579-
}
580-
581-
last_end = m.end();
582-
}
583-
584-
// Update remaining text
585-
remaining = &text[last_end..];
586-
last_end = 0;
587-
588-
// Process reference-style links in remaining text
589-
for m in reference_regex.find_iter(remaining) {
590-
// Add text before the match
591-
if m.start() > last_end {
592-
events.push(Event::Text(remaining[last_end..m.start()].to_string().into()));
593-
}
594-
595-
if let Some(caps) = reference_regex.captures(&remaining[m.start()..m.end()]) {
596-
let link_text = caps[1].to_string();
569+
570+
} else if caps.name("reference").is_some() {
571+
// Reference link: [text][]
572+
let link_text = caps.name("reference_text").unwrap().as_str().to_string();
597573

598574
// Determine URL based on pattern
599575
let url = if let Some(line_caps) = regex::Regex::new(r"^([^:]+\.[a-z]+):(\d+)$").unwrap().captures(&link_text) {
@@ -604,9 +580,8 @@ fn process_malformed_links_in_text(text: &str, events: &mut Vec<Event>) {
604580
format!("dialectic:{}", link_text)
605581
} else {
606582
// For other reference links, leave as-is for now
607-
events.push(Event::Text(remaining[m.start()..m.end()].to_string().into()));
608-
last_end = m.end();
609-
continue;
583+
events.push(Event::Text(format!("[{}][]", link_text).into()));
584+
return;
610585
};
611586

612587
// Generate proper link events
@@ -619,13 +594,35 @@ fn process_malformed_links_in_text(text: &str, events: &mut Vec<Event>) {
619594
events.push(Event::Text(link_text.into()));
620595
events.push(Event::End(TagEnd::Link));
621596
}
597+
});
598+
}
599+
600+
fn process_regex_matches<F>(
601+
text: &str,
602+
regex: &regex::Regex,
603+
events: &mut Vec<Event>,
604+
mut handle_match: F,
605+
) where
606+
F: FnMut(&regex::Captures, &mut Vec<Event>),
607+
{
608+
let mut last_end = 0;
609+
610+
for m in regex.find_iter(text) {
611+
// Add text before the match
612+
if m.start() > last_end {
613+
events.push(Event::Text(text[last_end..m.start()].to_string().into()));
614+
}
615+
616+
if let Some(caps) = regex.captures(&text[m.start()..m.end()]) {
617+
handle_match(&caps, events);
618+
}
622619

623620
last_end = m.end();
624621
}
625622

626623
// Add any remaining text
627-
if last_end < remaining.len() {
628-
events.push(Event::Text(remaining[last_end..].to_string().into()));
624+
if last_end < text.len() {
625+
events.push(Event::Text(text[last_end..].to_string().into()));
629626
}
630627
}
631628

@@ -747,4 +744,19 @@ Also [main.rs][] and [utils.ts:42][].
747744
]
748745
"#]]);
749746
}
747+
748+
#[test]
749+
fn test_mixed_link_types_in_single_text() {
750+
let markdown = r#"
751+
Check [foo.rs][], [foo](foo.rs?a b), [bar.rs][].
752+
"#;
753+
754+
check_extracted_urls(markdown, expect![[r#"
755+
[
756+
"dialectic:foo.rs",
757+
"dialectic:foo.rs?regex=a%20b",
758+
"dialectic:bar.rs",
759+
]
760+
"#]]);
761+
}
750762
}

0 commit comments

Comments
 (0)