Skip to content

Commit b9c4dc1

Browse files
committed
fix(rostra-bot): escaping of special characters
1 parent 5ef9480 commit b9c4dc1

File tree

3 files changed

+220
-5
lines changed

3 files changed

+220
-5
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/bots/rostra-bot/Cargo.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,3 +38,7 @@ tokio = { workspace = true, features = ["time"] }
3838
tracing = { workspace = true }
3939
tracing-subscriber = { workspace = true }
4040
url = { workspace = true }
41+
42+
[dev-dependencies]
43+
jotup = { workspace = true }
44+
tokio = { workspace = true, features = ["rt-multi-thread", "macros"] }

crates/bots/rostra-bot/src/publisher.rs

Lines changed: 215 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,21 @@ use tracing::{info, warn};
99

1010
use crate::tables::Article;
1111

12+
/// Escape parentheses in URLs for Djot/Markdown link syntax.
13+
/// Parentheses in URLs conflict with the `[text](url)` syntax.
14+
fn escape_url_for_djot(url: &str) -> String {
15+
url.replace('(', "%28").replace(')', "%29")
16+
}
17+
18+
/// Escape text that will be used inside link brackets `[text]`.
19+
/// Both `[` and `]` characters need escaping to prevent djot from
20+
/// interpreting them as nested link syntax.
21+
fn escape_link_text(text: &str) -> String {
22+
text.replace('\\', "\\\\")
23+
.replace('[', "\\[")
24+
.replace(']', "\\]")
25+
}
26+
1227
#[derive(Debug, Snafu)]
1328
pub enum PublisherError {
1429
#[snafu(display("Failed to post to Rostra: {source}"))]
@@ -41,7 +56,9 @@ impl Publisher {
4156
// Atom feed format: title on top, then "by {author} from {feed_title}
4257
// ({subtitle})"
4358
if let Some(ref url) = article.url {
44-
post.push_str(&format!("##### [{}]({})\n\n", article.title, url));
59+
let escaped_title = escape_link_text(&article.title);
60+
let escaped_url = escape_url_for_djot(url);
61+
post.push_str(&format!("##### [{escaped_title}]({escaped_url})\n\n"));
4562
} else {
4663
post.push_str(&format!("##### {}\n\n", article.title));
4764
}
@@ -57,7 +74,9 @@ impl Publisher {
5774
if let Some(ref feed_title) = article.feed_title {
5875
post.push_str(" from ");
5976
if let Some(ref feed_link) = article.feed_link {
60-
post.push_str(&format!("[{feed_title}]({feed_link})"));
77+
let escaped_feed_title = escape_link_text(feed_title);
78+
let escaped_link = escape_url_for_djot(feed_link);
79+
post.push_str(&format!("[{escaped_feed_title}]({escaped_link})"));
6180
} else {
6281
post.push_str(feed_title);
6382
}
@@ -69,7 +88,9 @@ impl Publisher {
6988
} else {
7089
// HN/Lobsters format: title with comments link
7190
if let Some(ref url) = article.url {
72-
post.push_str(&format!("##### [{}]({})\n\n", article.title, url));
91+
let escaped_title = escape_link_text(&article.title);
92+
let escaped_url = escape_url_for_djot(url);
93+
post.push_str(&format!("##### [{escaped_title}]({escaped_url})\n\n"));
7394
} else {
7495
post.push_str(&format!("##### {}\n\n", article.title));
7596
}
@@ -79,9 +100,9 @@ impl Publisher {
79100
"lobsters" => "Lobsters",
80101
_ => &article.source,
81102
};
103+
let escaped_source_url = escape_url_for_djot(&article.source_url);
82104
post.push_str(&format!(
83-
"* [💬 {} Comments]({})\n",
84-
source_name, article.source_url
105+
"* [💬 {source_name} Comments]({escaped_source_url})\n"
85106
));
86107
}
87108

@@ -144,3 +165,192 @@ impl Publisher {
144165
results
145166
}
146167
}
168+
169+
#[cfg(test)]
170+
mod tests {
171+
use jotup::r#async::AsyncRenderOutputExt;
172+
173+
use super::*;
174+
175+
/// Helper to render djot content to HTML
176+
async fn render_djot(content: &str) -> String {
177+
let out = jotup::html::tokio::Renderer::default()
178+
.render_into_document(content)
179+
.await
180+
.expect("Rendering failed");
181+
String::from_utf8(out.into_inner()).expect("valid utf8")
182+
}
183+
184+
#[test]
185+
fn test_escape_url_for_djot() {
186+
assert_eq!(
187+
escape_url_for_djot("https://example.com/path"),
188+
"https://example.com/path"
189+
);
190+
assert_eq!(
191+
escape_url_for_djot("https://example.com/S1550-4131(26)00008-2"),
192+
"https://example.com/S1550-4131%2826%2900008-2"
193+
);
194+
assert_eq!(
195+
escape_url_for_djot("https://example.com/nested((parens))"),
196+
"https://example.com/nested%28%28parens%29%29"
197+
);
198+
}
199+
200+
#[test]
201+
fn test_escape_link_text() {
202+
assert_eq!(escape_link_text("Normal title"), "Normal title");
203+
assert_eq!(
204+
escape_link_text("Title with [brackets]"),
205+
"Title with \\[brackets\\]"
206+
);
207+
assert_eq!(escape_link_text("Title with ] only"), "Title with \\] only");
208+
assert_eq!(escape_link_text("Title with [ only"), "Title with \\[ only");
209+
assert_eq!(
210+
escape_link_text("Title with \\ backslash"),
211+
"Title with \\\\ backslash"
212+
);
213+
}
214+
215+
#[tokio::test]
216+
async fn test_url_with_parentheses_renders_correctly() {
217+
// Test the exact URL pattern that was causing issues
218+
let escaped_url = escape_url_for_djot(
219+
"https://www.cell.com/cell-metabolism/abstract/S1550-4131(26)00008-2",
220+
);
221+
let djot = format!(
222+
"##### [Semaglutide improves knee osteoarthritis]({})",
223+
escaped_url
224+
);
225+
226+
let html = render_djot(&djot).await;
227+
228+
// The link should be complete and correct
229+
assert!(
230+
html.contains(
231+
"href=\"https://www.cell.com/cell-metabolism/abstract/S1550-4131%2826%2900008-2\""
232+
),
233+
"URL should have escaped parentheses. Got: {}",
234+
html
235+
);
236+
assert!(
237+
html.contains(">Semaglutide improves knee osteoarthritis</a>"),
238+
"Link text should be complete. Got: {}",
239+
html
240+
);
241+
// Should NOT have broken content like ")00008-2)" outside the link
242+
assert!(
243+
!html.contains("00008-2)"),
244+
"Should not have broken URL fragments in text. Got: {}",
245+
html
246+
);
247+
}
248+
249+
#[tokio::test]
250+
async fn test_title_with_brackets_renders_correctly() {
251+
let escaped_title = escape_link_text("Article about [Rust] programming");
252+
let djot = format!("##### [{}](https://example.com/article)", escaped_title);
253+
254+
let html = render_djot(&djot).await;
255+
256+
// Should be a single link, not broken into multiple
257+
assert_eq!(
258+
html.matches("<a ").count(),
259+
1,
260+
"Should have exactly one link. Got: {}",
261+
html
262+
);
263+
// The link should contain the full text with brackets preserved
264+
assert!(
265+
html.contains("Article about [Rust] programming</a>"),
266+
"Link text should contain brackets. Got: {}",
267+
html
268+
);
269+
}
270+
271+
#[tokio::test]
272+
async fn test_title_with_closing_bracket_renders_correctly() {
273+
// This is the edge case that would break without escaping
274+
let escaped_title = escape_link_text("Why ] matters in code");
275+
let djot = format!("##### [{}](https://example.com/article)", escaped_title);
276+
277+
let html = render_djot(&djot).await;
278+
279+
// Should have a proper link
280+
assert!(
281+
html.contains("<a ") && html.contains("</a>"),
282+
"Should have a complete link. Got: {}",
283+
html
284+
);
285+
assert!(
286+
html.contains("href=\"https://example.com/article\""),
287+
"Link should have correct href. Got: {}",
288+
html
289+
);
290+
}
291+
292+
#[tokio::test]
293+
async fn test_feed_title_with_special_chars() {
294+
let escaped_feed_title = escape_link_text("Blog [Tech] News");
295+
let escaped_url = escape_url_for_djot("https://blog.example.com/(feed)");
296+
let djot = format!(
297+
"Posted by Author from [{}]({})",
298+
escaped_feed_title, escaped_url
299+
);
300+
301+
let html = render_djot(&djot).await;
302+
303+
// Feed title link should work
304+
assert!(
305+
html.contains("href=\"https://blog.example.com/%28feed%29\""),
306+
"Feed URL should have escaped parentheses. Got: {}",
307+
html
308+
);
309+
}
310+
311+
#[tokio::test]
312+
async fn test_complex_url_with_multiple_special_chars() {
313+
// Real-world URLs can have multiple parentheses and other chars
314+
let url = "https://en.wikipedia.org/wiki/Rust_(programming_language)";
315+
let escaped = escape_url_for_djot(url);
316+
let djot = format!("[Rust]({})", escaped);
317+
318+
let html = render_djot(&djot).await;
319+
320+
assert!(
321+
html.contains("href=\"https://en.wikipedia.org/wiki/Rust_%28programming_language%29\""),
322+
"Wikipedia-style URL should be escaped. Got: {}",
323+
html
324+
);
325+
assert!(
326+
html.contains(">Rust</a>"),
327+
"Link text should be intact. Got: {}",
328+
html
329+
);
330+
}
331+
332+
#[tokio::test]
333+
async fn test_hn_format_renders_correctly() {
334+
// Test the HN/Lobsters format with comments link
335+
let title = escape_link_text("Show HN: My [new] project");
336+
let article_url = escape_url_for_djot("https://github.com/user/project");
337+
let source_url = escape_url_for_djot("https://news.ycombinator.com/item?id=12345");
338+
339+
let djot = format!("##### [{title}]({article_url})\n\n* [💬 HN Comments]({source_url})\n",);
340+
341+
let html = render_djot(&djot).await;
342+
343+
// Should have two links
344+
assert_eq!(
345+
html.matches("<a ").count(),
346+
2,
347+
"Should have two links. Got: {}",
348+
html
349+
);
350+
assert!(
351+
html.contains("💬 HN Comments</a>"),
352+
"Should have comments link text. Got: {}",
353+
html
354+
);
355+
}
356+
}

0 commit comments

Comments
 (0)