Skip to content
This repository was archived by the owner on Sep 23, 2025. It is now read-only.

Commit 1136784

Browse files
committed
Add test demonstrating regex URL conversion bug
- Add test showing regex incorrectly processes links in code blocks - Mark test as ignored until proper pulldown-cmark implementation - Current regex approach blindly converts all [text](url) patterns - Need pulldown-cmark to respect markdown structure (code blocks, etc.)
1 parent 16b34b8 commit 1136784

File tree

2 files changed

+56
-7
lines changed

2 files changed

+56
-7
lines changed

server/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ chrono = { version = "0.4", features = ["serde"] }
5757
# File system traversal with gitignore support
5858
ignore = "0.4"
5959
pulldown-cmark = "0.13.0"
60+
urlencoding = "2.1.3"
6061

6162
[dev-dependencies]
6263
tokio-test = { workspace = true }

server/src/ide.rs

Lines changed: 55 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
use std::{future::Future, pin::Pin};
22

33
use serde::{Deserialize, Deserializer, Serialize};
4+
use pulldown_cmark::{Parser, Event, Tag};
45

56
use crate::dialect::{DialectFunction, DialectInterpreter};
67

@@ -437,6 +438,16 @@ pub struct ResolvedWalkthrough {
437438
pub base_uri: String,
438439
}
439440

441+
/// Markdown content with processed file references converted to dialectic: URLs.
442+
///
443+
/// This type has a custom `Deserialize` implementation that automatically processes
444+
/// markdown during deserialization, converting file references like:
445+
/// - `[text](src/file.ts?pattern)` → `[text](dialectic:src/file.ts?regex=pattern)`
446+
/// - `[text](src/file.ts#L42)` → `[text](dialectic:src/file.ts?line=42)`
447+
/// - `[text](src/file.ts)` → `[text](dialectic:src/file.ts)`
448+
///
449+
/// This ensures the extension receives properly formatted dialectic: URLs without
450+
/// needing client-side conversion logic.
440451
#[derive(Serialize, Debug)]
441452
pub struct ResolvedMarkdownElement {
442453
pub content: String,
@@ -456,15 +467,16 @@ impl<'de> Deserialize<'de> for ResolvedMarkdownElement {
456467
}
457468

458469
fn process_markdown_links(markdown: String) -> String {
459-
// For now, just do simple regex-based URL conversion
470+
// For now, just do simple regex-based URL conversion with proper encoding
460471
// TODO: Implement proper markdown parsing with pulldown-cmark
461472
let mut result = markdown;
462473

463474
// Handle path?regex format for search
464475
result = regex::Regex::new(r"\[([^\]]+)\]\(([^\s\[\]()]+)\?([^\[\]()]+)\)")
465476
.unwrap()
466477
.replace_all(&result, |caps: &regex::Captures| {
467-
format!("[{}](dialectic:{}?regex={})", &caps[1], &caps[2], &caps[3])
478+
let encoded_query = urlencoding::encode(&caps[3]);
479+
format!("[{}](dialectic:{}?regex={})", &caps[1], &caps[2], encoded_query)
468480
})
469481
.to_string();
470482

@@ -524,14 +536,50 @@ mod url_conversion_tests {
524536
let markdown = r#"
525537
Check out [this function](src/auth.ts?validateToken) and
526538
[this line](src/auth.ts#L42) or [this range](src/auth.ts#L42-L50).
527-
Also see [the whole file](src/auth.ts).
539+
Also see [the whole file](src/auth.ts) and [this function with spaces](src/auth.rs?fn foo).
528540
"#;
529541

530542
let processed = process_markdown_links(markdown.to_string());
531543

532-
assert!(processed.contains("dialectic:src/auth.ts?regex=validateToken"));
533-
assert!(processed.contains("dialectic:src/auth.ts?line=42"));
534-
assert!(processed.contains("dialectic:src/auth.ts?line=42-50"));
535-
assert!(processed.contains("dialectic:src/auth.ts"));
544+
// Extract URLs using pulldown-cmark parser
545+
let parser = Parser::new(&processed);
546+
let mut urls = Vec::new();
547+
548+
for event in parser {
549+
if let Event::Start(Tag::Link { dest_url, .. }) = event {
550+
urls.push(dest_url.to_string());
551+
}
552+
}
553+
554+
// Verify the converted URLs
555+
assert!(urls.contains(&"dialectic:src/auth.ts?regex=validateToken".to_string()));
556+
assert!(urls.contains(&"dialectic:src/auth.ts?line=42".to_string()));
557+
assert!(urls.contains(&"dialectic:src/auth.ts?line=42-50".to_string()));
558+
assert!(urls.contains(&"dialectic:src/auth.ts".to_string()));
559+
assert!(urls.contains(&"dialectic:src/auth.rs?regex=fn%20foo".to_string()));
560+
}
561+
562+
#[test]
563+
#[ignore = "Demonstrates regex bug - will be fixed when we implement proper pulldown-cmark processing"]
564+
fn test_regex_incorrectly_processes_code_blocks() {
565+
let markdown = r#"
566+
Here's a real link: [check this](src/real.ts?pattern)
567+
568+
But this should be ignored:
569+
```
570+
// This is just example code, not a real link
571+
[fake link](src/fake.ts?pattern)
572+
```
573+
574+
And this inline code too: `[another fake](src/inline.ts)`
575+
"#;
576+
577+
let processed = process_markdown_links(markdown.to_string());
578+
579+
// The regex approach incorrectly converts links inside code blocks
580+
assert!(processed.contains("dialectic:src/real.ts?regex=pattern"));
581+
// This should NOT happen - links in code blocks should be left alone
582+
assert!(processed.contains("dialectic:src/fake.ts?regex=pattern")); // This proves the bug
583+
assert!(processed.contains("dialectic:src/inline.ts")); // This too
536584
}
537585
}

0 commit comments

Comments
 (0)