11use std:: { future:: Future , pin:: Pin } ;
22
33use serde:: { Deserialize , Deserializer , Serialize } ;
4+ use pulldown_cmark:: { Parser , Event , Tag } ;
45
56use crate :: dialect:: { DialectFunction , DialectInterpreter } ;
67
@@ -437,6 +438,16 @@ pub struct ResolvedWalkthrough {
437438 pub base_uri : String ,
438439}
439440
441+ /// Markdown content with processed file references converted to dialectic: URLs.
442+ ///
443+ /// This type has a custom `Deserialize` implementation that automatically processes
444+ /// markdown during deserialization, converting file references like:
445+ /// - `[text](src/file.ts?pattern)` → `[text](dialectic:src/file.ts?regex=pattern)`
446+ /// - `[text](src/file.ts#L42)` → `[text](dialectic:src/file.ts?line=42)`
447+ /// - `[text](src/file.ts)` → `[text](dialectic:src/file.ts)`
448+ ///
449+ /// This ensures the extension receives properly formatted dialectic: URLs without
450+ /// needing client-side conversion logic.
440451#[ derive( Serialize , Debug ) ]
441452pub struct ResolvedMarkdownElement {
442453 pub content : String ,
@@ -456,15 +467,16 @@ impl<'de> Deserialize<'de> for ResolvedMarkdownElement {
456467}
457468
458469fn process_markdown_links ( markdown : String ) -> String {
459- // For now, just do simple regex-based URL conversion
470+ // For now, just do simple regex-based URL conversion with proper encoding
460471 // TODO: Implement proper markdown parsing with pulldown-cmark
461472 let mut result = markdown;
462473
463474 // Handle path?regex format for search
464475 result = regex:: Regex :: new ( r"\[([^\]]+)\]\(([^\s\[\]()]+)\?([^\[\]()]+)\)" )
465476 . unwrap ( )
466477 . replace_all ( & result, |caps : & regex:: Captures | {
467- format ! ( "[{}](dialectic:{}?regex={})" , & caps[ 1 ] , & caps[ 2 ] , & caps[ 3 ] )
478+ let encoded_query = urlencoding:: encode ( & caps[ 3 ] ) ;
479+ format ! ( "[{}](dialectic:{}?regex={})" , & caps[ 1 ] , & caps[ 2 ] , encoded_query)
468480 } )
469481 . to_string ( ) ;
470482
@@ -524,14 +536,50 @@ mod url_conversion_tests {
524536 let markdown = r#"
525537Check out [this function](src/auth.ts?validateToken) and
526538[this line](src/auth.ts#L42) or [this range](src/auth.ts#L42-L50).
527- Also see [the whole file](src/auth.ts).
539+ Also see [the whole file](src/auth.ts) and [this function with spaces](src/auth.rs?fn foo) .
528540"# ;
529541
530542 let processed = process_markdown_links ( markdown. to_string ( ) ) ;
531543
532- assert ! ( processed. contains( "dialectic:src/auth.ts?regex=validateToken" ) ) ;
533- assert ! ( processed. contains( "dialectic:src/auth.ts?line=42" ) ) ;
534- assert ! ( processed. contains( "dialectic:src/auth.ts?line=42-50" ) ) ;
535- assert ! ( processed. contains( "dialectic:src/auth.ts" ) ) ;
544+ // Extract URLs using pulldown-cmark parser
545+ let parser = Parser :: new ( & processed) ;
546+ let mut urls = Vec :: new ( ) ;
547+
548+ for event in parser {
549+ if let Event :: Start ( Tag :: Link { dest_url, .. } ) = event {
550+ urls. push ( dest_url. to_string ( ) ) ;
551+ }
552+ }
553+
554+ // Verify the converted URLs
555+ assert ! ( urls. contains( & "dialectic:src/auth.ts?regex=validateToken" . to_string( ) ) ) ;
556+ assert ! ( urls. contains( & "dialectic:src/auth.ts?line=42" . to_string( ) ) ) ;
557+ assert ! ( urls. contains( & "dialectic:src/auth.ts?line=42-50" . to_string( ) ) ) ;
558+ assert ! ( urls. contains( & "dialectic:src/auth.ts" . to_string( ) ) ) ;
559+ assert ! ( urls. contains( & "dialectic:src/auth.rs?regex=fn%20foo" . to_string( ) ) ) ;
560+ }
561+
562+ #[ test]
563+ #[ ignore = "Demonstrates regex bug - will be fixed when we implement proper pulldown-cmark processing" ]
564+ fn test_regex_incorrectly_processes_code_blocks ( ) {
565+ let markdown = r#"
566+ Here's a real link: [check this](src/real.ts?pattern)
567+
568+ But this should be ignored:
569+ ```
570+ // This is just example code, not a real link
571+ [fake link](src/fake.ts?pattern)
572+ ```
573+
574+ And this inline code too: `[another fake](src/inline.ts)`
575+ "# ;
576+
577+ let processed = process_markdown_links ( markdown. to_string ( ) ) ;
578+
579+ // The regex approach incorrectly converts links inside code blocks
580+ assert ! ( processed. contains( "dialectic:src/real.ts?regex=pattern" ) ) ;
581+ // This should NOT happen - links in code blocks should be left alone
582+ assert ! ( processed. contains( "dialectic:src/fake.ts?regex=pattern" ) ) ; // This proves the bug
583+ assert ! ( processed. contains( "dialectic:src/inline.ts" ) ) ; // This too
536584 }
537585}
0 commit comments