@@ -9,6 +9,21 @@ use tracing::{info, warn};
99
1010use crate :: tables:: Article ;
1111
12+ /// Escape parentheses in URLs for Djot/Markdown link syntax.
13+ /// Parentheses in URLs conflict with the `[text](url)` syntax.
14+ fn escape_url_for_djot ( url : & str ) -> String {
15+ url. replace ( '(' , "%28" ) . replace ( ')' , "%29" )
16+ }
17+
18+ /// Escape text that will be used inside link brackets `[text]`.
19+ /// Both `[` and `]` characters need escaping to prevent djot from
20+ /// interpreting them as nested link syntax.
21+ fn escape_link_text ( text : & str ) -> String {
22+ text. replace ( '\\' , "\\ \\ " )
23+ . replace ( '[' , "\\ [" )
24+ . replace ( ']' , "\\ ]" )
25+ }
26+
1227#[ derive( Debug , Snafu ) ]
1328pub enum PublisherError {
1429 #[ snafu( display( "Failed to post to Rostra: {source}" ) ) ]
@@ -41,7 +56,9 @@ impl Publisher {
4156 // Atom feed format: title on top, then "by {author} from {feed_title}
4257 // ({subtitle})"
4358 if let Some ( ref url) = article. url {
44- post. push_str ( & format ! ( "##### [{}]({})\n \n " , article. title, url) ) ;
59+ let escaped_title = escape_link_text ( & article. title ) ;
60+ let escaped_url = escape_url_for_djot ( url) ;
61+ post. push_str ( & format ! ( "##### [{escaped_title}]({escaped_url})\n \n " ) ) ;
4562 } else {
4663 post. push_str ( & format ! ( "##### {}\n \n " , article. title) ) ;
4764 }
@@ -57,7 +74,9 @@ impl Publisher {
5774 if let Some ( ref feed_title) = article. feed_title {
5875 post. push_str ( " from " ) ;
5976 if let Some ( ref feed_link) = article. feed_link {
60- post. push_str ( & format ! ( "[{feed_title}]({feed_link})" ) ) ;
77+ let escaped_feed_title = escape_link_text ( feed_title) ;
78+ let escaped_link = escape_url_for_djot ( feed_link) ;
79+ post. push_str ( & format ! ( "[{escaped_feed_title}]({escaped_link})" ) ) ;
6180 } else {
6281 post. push_str ( feed_title) ;
6382 }
@@ -69,7 +88,9 @@ impl Publisher {
6988 } else {
7089 // HN/Lobsters format: title with comments link
7190 if let Some ( ref url) = article. url {
72- post. push_str ( & format ! ( "##### [{}]({})\n \n " , article. title, url) ) ;
91+ let escaped_title = escape_link_text ( & article. title ) ;
92+ let escaped_url = escape_url_for_djot ( url) ;
93+ post. push_str ( & format ! ( "##### [{escaped_title}]({escaped_url})\n \n " ) ) ;
7394 } else {
7495 post. push_str ( & format ! ( "##### {}\n \n " , article. title) ) ;
7596 }
@@ -79,9 +100,9 @@ impl Publisher {
79100 "lobsters" => "Lobsters" ,
80101 _ => & article. source ,
81102 } ;
103+ let escaped_source_url = escape_url_for_djot ( & article. source_url ) ;
82104 post. push_str ( & format ! (
83- "* [💬 {} Comments]({})\n " ,
84- source_name, article. source_url
105+ "* [💬 {source_name} Comments]({escaped_source_url})\n "
85106 ) ) ;
86107 }
87108
@@ -144,3 +165,192 @@ impl Publisher {
144165 results
145166 }
146167}
168+
169+ #[ cfg( test) ]
170+ mod tests {
171+ use jotup:: r#async:: AsyncRenderOutputExt ;
172+
173+ use super :: * ;
174+
175+ /// Helper to render djot content to HTML
176+ async fn render_djot ( content : & str ) -> String {
177+ let out = jotup:: html:: tokio:: Renderer :: default ( )
178+ . render_into_document ( content)
179+ . await
180+ . expect ( "Rendering failed" ) ;
181+ String :: from_utf8 ( out. into_inner ( ) ) . expect ( "valid utf8" )
182+ }
183+
184+ #[ test]
185+ fn test_escape_url_for_djot ( ) {
186+ assert_eq ! (
187+ escape_url_for_djot( "https://example.com/path" ) ,
188+ "https://example.com/path"
189+ ) ;
190+ assert_eq ! (
191+ escape_url_for_djot( "https://example.com/S1550-4131(26)00008-2" ) ,
192+ "https://example.com/S1550-4131%2826%2900008-2"
193+ ) ;
194+ assert_eq ! (
195+ escape_url_for_djot( "https://example.com/nested((parens))" ) ,
196+ "https://example.com/nested%28%28parens%29%29"
197+ ) ;
198+ }
199+
200+ #[ test]
201+ fn test_escape_link_text ( ) {
202+ assert_eq ! ( escape_link_text( "Normal title" ) , "Normal title" ) ;
203+ assert_eq ! (
204+ escape_link_text( "Title with [brackets]" ) ,
205+ "Title with \\ [brackets\\ ]"
206+ ) ;
207+ assert_eq ! ( escape_link_text( "Title with ] only" ) , "Title with \\ ] only" ) ;
208+ assert_eq ! ( escape_link_text( "Title with [ only" ) , "Title with \\ [ only" ) ;
209+ assert_eq ! (
210+ escape_link_text( "Title with \\ backslash" ) ,
211+ "Title with \\ \\ backslash"
212+ ) ;
213+ }
214+
215+ #[ tokio:: test]
216+ async fn test_url_with_parentheses_renders_correctly ( ) {
217+ // Test the exact URL pattern that was causing issues
218+ let escaped_url = escape_url_for_djot (
219+ "https://www.cell.com/cell-metabolism/abstract/S1550-4131(26)00008-2" ,
220+ ) ;
221+ let djot = format ! (
222+ "##### [Semaglutide improves knee osteoarthritis]({})" ,
223+ escaped_url
224+ ) ;
225+
226+ let html = render_djot ( & djot) . await ;
227+
228+ // The link should be complete and correct
229+ assert ! (
230+ html. contains(
231+ "href=\" https://www.cell.com/cell-metabolism/abstract/S1550-4131%2826%2900008-2\" "
232+ ) ,
233+ "URL should have escaped parentheses. Got: {}" ,
234+ html
235+ ) ;
236+ assert ! (
237+ html. contains( ">Semaglutide improves knee osteoarthritis</a>" ) ,
238+ "Link text should be complete. Got: {}" ,
239+ html
240+ ) ;
241+ // Should NOT have broken content like ")00008-2)" outside the link
242+ assert ! (
243+ !html. contains( "00008-2)" ) ,
244+ "Should not have broken URL fragments in text. Got: {}" ,
245+ html
246+ ) ;
247+ }
248+
249+ #[ tokio:: test]
250+ async fn test_title_with_brackets_renders_correctly ( ) {
251+ let escaped_title = escape_link_text ( "Article about [Rust] programming" ) ;
252+ let djot = format ! ( "##### [{}](https://example.com/article)" , escaped_title) ;
253+
254+ let html = render_djot ( & djot) . await ;
255+
256+ // Should be a single link, not broken into multiple
257+ assert_eq ! (
258+ html. matches( "<a " ) . count( ) ,
259+ 1 ,
260+ "Should have exactly one link. Got: {}" ,
261+ html
262+ ) ;
263+ // The link should contain the full text with brackets preserved
264+ assert ! (
265+ html. contains( "Article about [Rust] programming</a>" ) ,
266+ "Link text should contain brackets. Got: {}" ,
267+ html
268+ ) ;
269+ }
270+
271+ #[ tokio:: test]
272+ async fn test_title_with_closing_bracket_renders_correctly ( ) {
273+ // This is the edge case that would break without escaping
274+ let escaped_title = escape_link_text ( "Why ] matters in code" ) ;
275+ let djot = format ! ( "##### [{}](https://example.com/article)" , escaped_title) ;
276+
277+ let html = render_djot ( & djot) . await ;
278+
279+ // Should have a proper link
280+ assert ! (
281+ html. contains( "<a " ) && html. contains( "</a>" ) ,
282+ "Should have a complete link. Got: {}" ,
283+ html
284+ ) ;
285+ assert ! (
286+ html. contains( "href=\" https://example.com/article\" " ) ,
287+ "Link should have correct href. Got: {}" ,
288+ html
289+ ) ;
290+ }
291+
292+ #[ tokio:: test]
293+ async fn test_feed_title_with_special_chars ( ) {
294+ let escaped_feed_title = escape_link_text ( "Blog [Tech] News" ) ;
295+ let escaped_url = escape_url_for_djot ( "https://blog.example.com/(feed)" ) ;
296+ let djot = format ! (
297+ "Posted by Author from [{}]({})" ,
298+ escaped_feed_title, escaped_url
299+ ) ;
300+
301+ let html = render_djot ( & djot) . await ;
302+
303+ // Feed title link should work
304+ assert ! (
305+ html. contains( "href=\" https://blog.example.com/%28feed%29\" " ) ,
306+ "Feed URL should have escaped parentheses. Got: {}" ,
307+ html
308+ ) ;
309+ }
310+
311+ #[ tokio:: test]
312+ async fn test_complex_url_with_multiple_special_chars ( ) {
313+ // Real-world URLs can have multiple parentheses and other chars
314+ let url = "https://en.wikipedia.org/wiki/Rust_(programming_language)" ;
315+ let escaped = escape_url_for_djot ( url) ;
316+ let djot = format ! ( "[Rust]({})" , escaped) ;
317+
318+ let html = render_djot ( & djot) . await ;
319+
320+ assert ! (
321+ html. contains( "href=\" https://en.wikipedia.org/wiki/Rust_%28programming_language%29\" " ) ,
322+ "Wikipedia-style URL should be escaped. Got: {}" ,
323+ html
324+ ) ;
325+ assert ! (
326+ html. contains( ">Rust</a>" ) ,
327+ "Link text should be intact. Got: {}" ,
328+ html
329+ ) ;
330+ }
331+
332+ #[ tokio:: test]
333+ async fn test_hn_format_renders_correctly ( ) {
334+ // Test the HN/Lobsters format with comments link
335+ let title = escape_link_text ( "Show HN: My [new] project" ) ;
336+ let article_url = escape_url_for_djot ( "https://github.com/user/project" ) ;
337+ let source_url = escape_url_for_djot ( "https://news.ycombinator.com/item?id=12345" ) ;
338+
339+ let djot = format ! ( "##### [{title}]({article_url})\n \n * [💬 HN Comments]({source_url})\n " , ) ;
340+
341+ let html = render_djot ( & djot) . await ;
342+
343+ // Should have two links
344+ assert_eq ! (
345+ html. matches( "<a " ) . count( ) ,
346+ 2 ,
347+ "Should have two links. Got: {}" ,
348+ html
349+ ) ;
350+ assert ! (
351+ html. contains( "💬 HN Comments</a>" ) ,
352+ "Should have comments link text. Got: {}" ,
353+ html
354+ ) ;
355+ }
356+ }
0 commit comments