Skip to content
This repository was archived by the owner on Sep 23, 2025. It is now read-only.

Commit 61bc77f

Browse files
committed
Refactor to Event-based malformed link processing
- Replace string manipulation with direct Event generation - process_malformed_links_in_text now emits proper Start(Link)/Text/End(Link) sequences - Reference-style links generate dialectic: URLs directly to avoid double-processing - Cleaner separation: coalesce → process malformed → convert remaining links - All tests passing with more efficient approach
1 parent d8ad877 commit 61bc77f

File tree

1 file changed

+89
-46
lines changed

1 file changed

+89
-46
lines changed

server/src/ide.rs

Lines changed: 89 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -472,20 +472,23 @@ fn process_markdown_links(markdown: String) -> String {
472472
let parser = Parser::new(&markdown);
473473
let mut events: Vec<Event> = parser.collect();
474474

475-
// Pass 1: Convert well-formed Link events
475+
// Pass 1: Coalesce adjacent Text events first
476+
events = coalesce_text_events(events);
477+
478+
// Pass 2: Process malformed links in Text events
479+
events = process_malformed_links_in_events(events);
480+
481+
// Pass 3: Convert well-formed Link events (but skip ones already processed)
476482
for event in &mut events {
477483
if let Event::Start(Tag::Link { dest_url, .. }) = event {
478-
let converted_url = convert_url_to_dialectic(dest_url);
479-
*dest_url = converted_url.into();
484+
// Only convert if it doesn't already start with dialectic:
485+
if !dest_url.starts_with("dialectic:") {
486+
let converted_url = convert_url_to_dialectic(dest_url);
487+
*dest_url = converted_url.into();
488+
}
480489
}
481490
}
482491

483-
// Pass 2: Coalesce adjacent Text events
484-
events = coalesce_text_events(events);
485-
486-
// Pass 3: Process malformed links in Text events
487-
events = process_malformed_links_in_events(events);
488-
489492
// Convert events back to markdown
490493
let mut output = String::new();
491494
pulldown_cmark_to_cmark::cmark(events.into_iter(), &mut output).unwrap();
@@ -529,13 +532,7 @@ fn process_malformed_links_in_events(events: Vec<Event>) -> Vec<Event> {
529532
for event in events {
530533
match event {
531534
Event::Text(text) => {
532-
let processed_text = process_malformed_links_in_text(&text);
533-
if processed_text != text.as_ref() {
534-
// Text was modified, create a new owned Text event
535-
result.push(Event::Text(processed_text.into()));
536-
} else {
537-
result.push(Event::Text(text));
538-
}
535+
process_malformed_links_in_text(&text, &mut result);
539536
}
540537
_ => {
541538
result.push(event);
@@ -546,45 +543,90 @@ fn process_malformed_links_in_events(events: Vec<Event>) -> Vec<Event> {
546543
result
547544
}
548545

549-
fn process_malformed_links_in_text(text: &str) -> String {
550-
let mut result = text.to_string();
546+
fn process_malformed_links_in_text(text: &str, events: &mut Vec<Event>) {
547+
use pulldown_cmark::{Event, Tag, TagEnd, LinkType};
548+
549+
let mut remaining = text;
551550

552551
// Handle malformed links with problematic characters: spaces, {, [, (
553-
// Pattern: [text](url with spaces or {[( characters)
554-
result = regex::Regex::new(r"\[([^\]]+)\]\(([^)]*[ \{\[\(][^)]*)\)")
555-
.unwrap()
556-
.replace_all(&result, |caps: &regex::Captures| {
557-
let link_text = &caps[1];
558-
let url = &caps[2];
559-
let converted_url = convert_url_to_dialectic(url);
560-
format!("[{}]({})", link_text, converted_url)
561-
})
562-
.to_string();
552+
let malformed_regex = regex::Regex::new(r"\[([^\]]+)\]\(([^)]*[ \{\[\(][^)]*)\)").unwrap();
563553

564554
// Handle reference-style links: [foo.rs][] or [foo.rs:22][]
565-
result = regex::Regex::new(r"\[([^\]]+)\]\[\]")
566-
.unwrap()
567-
.replace_all(&result, |caps: &regex::Captures| {
568-
let link_text = &caps[1];
555+
let reference_regex = regex::Regex::new(r"\[([^\]]+)\]\[\]").unwrap();
556+
557+
let mut last_end = 0;
558+
559+
// Process malformed links first
560+
for m in malformed_regex.find_iter(text) {
561+
// Add text before the match
562+
if m.start() > last_end {
563+
events.push(Event::Text(text[last_end..m.start()].to_string().into()));
564+
}
565+
566+
if let Some(caps) = malformed_regex.captures(&text[m.start()..m.end()]) {
567+
let link_text = caps[1].to_string();
568+
let url = caps[2].to_string();
569569

570-
// Handle [filename.ext:line][] format
571-
if let Some(line_caps) = regex::Regex::new(r"^([^:]+\.[a-z]+):(\d+)$").unwrap().captures(link_text) {
570+
// Generate proper link events
571+
events.push(Event::Start(Tag::Link {
572+
link_type: LinkType::Inline,
573+
dest_url: url.into(),
574+
title: "".into(),
575+
id: "".into()
576+
}));
577+
events.push(Event::Text(link_text.into()));
578+
events.push(Event::End(TagEnd::Link));
579+
}
580+
581+
last_end = m.end();
582+
}
583+
584+
// Update remaining text
585+
remaining = &text[last_end..];
586+
last_end = 0;
587+
588+
// Process reference-style links in remaining text
589+
for m in reference_regex.find_iter(remaining) {
590+
// Add text before the match
591+
if m.start() > last_end {
592+
events.push(Event::Text(remaining[last_end..m.start()].to_string().into()));
593+
}
594+
595+
if let Some(caps) = reference_regex.captures(&remaining[m.start()..m.end()]) {
596+
let link_text = caps[1].to_string();
597+
598+
// Determine URL based on pattern
599+
let url = if let Some(line_caps) = regex::Regex::new(r"^([^:]+\.[a-z]+):(\d+)$").unwrap().captures(&link_text) {
572600
let filename = &line_caps[1];
573601
let line_num = &line_caps[2];
574-
return format!("[{}](dialectic:{}#L{})", link_text, filename, line_num);
575-
}
576-
577-
// Handle [filename.ext][] format
578-
if regex::Regex::new(r"^[^:]+\.[a-z]+$").unwrap().is_match(link_text) {
579-
return format!("[{}](dialectic:{})", link_text, link_text);
580-
}
602+
format!("dialectic:{}#L{}", filename, line_num)
603+
} else if regex::Regex::new(r"^[^:]+\.[a-z]+$").unwrap().is_match(&link_text) {
604+
format!("dialectic:{}", link_text)
605+
} else {
606+
// For other reference links, leave as-is for now
607+
events.push(Event::Text(remaining[m.start()..m.end()].to_string().into()));
608+
last_end = m.end();
609+
continue;
610+
};
581611

582-
// For other reference links, leave unchanged for now
583-
format!("[{}][]", link_text)
584-
})
585-
.to_string();
612+
// Generate proper link events
613+
events.push(Event::Start(Tag::Link {
614+
link_type: LinkType::Inline,
615+
dest_url: url.into(),
616+
title: "".into(),
617+
id: "".into()
618+
}));
619+
events.push(Event::Text(link_text.into()));
620+
events.push(Event::End(TagEnd::Link));
621+
}
622+
623+
last_end = m.end();
624+
}
586625

587-
result
626+
// Add any remaining text
627+
if last_end < remaining.len() {
628+
events.push(Event::Text(remaining[last_end..].to_string().into()));
629+
}
588630
}
589631

590632
fn convert_url_to_dialectic(url: &str) -> String {
@@ -630,6 +672,7 @@ pub enum ResolvedWalkthroughElement {
630672
#[cfg(test)]
631673
mod url_conversion_tests {
632674
use super::*;
675+
use pulldown_cmark::{Parser, Event, Tag};
633676

634677
#[test]
635678
fn test_markdown_url_conversion() {

0 commit comments

Comments
 (0)