Skip to content

Commit 72f21ce

Browse files
committed
chore: test input sanitization
1 parent 2b75c4f commit 72f21ce

File tree

5 files changed

+730
-19
lines changed

5 files changed

+730
-19
lines changed

crates/rostra-web-ui/src/routes/content.rs

Lines changed: 37 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ use crate::UiState;
1313

1414
mod filters;
1515

16-
use filters::{PrismCodeBlocks, RostraMedia, RostraProfileLinks};
16+
use filters::{PrismCodeBlocks, RostraMedia, RostraProfileLinks, SanitizeUrls};
1717

1818
/// Extension trait for adding rostra-specific rendering transformations
1919
pub trait RostraRenderExt {
@@ -46,24 +46,53 @@ pub trait RostraRenderExt {
4646
{
4747
PrismCodeBlocks::new(self)
4848
}
49+
50+
/// Sanitize dangerous URL protocols (javascript:, vbscript:, data:)
51+
fn sanitize_urls(self) -> SanitizeUrls<Self>
52+
where
53+
Self: Sized,
54+
{
55+
SanitizeUrls::new(self)
56+
}
4957
}
5058

5159
impl<'s, R> RostraRenderExt for R where R: Sized + AsyncRender<'s> {}
5260

61+
/// Apply standard output filters (URL sanitization + syntax highlighting + XSS
62+
/// sanitization).
63+
///
64+
/// This is the final processing step for all content rendering. Takes an inner
65+
/// renderer and wraps it with URL sanitization, prism code blocks, and HTML
66+
/// sanitization.
67+
///
68+
/// - Production:
69+
/// `make_base_renderer(Renderer::default().profile_links().media())`
70+
/// - Tests: `make_base_renderer(Renderer::default())`
71+
pub(crate) fn make_base_renderer<'s, R>(
72+
renderer: R,
73+
) -> jotup::html::filters::AsyncSanitize<PrismCodeBlocks<SanitizeUrls<R>>>
74+
where
75+
R: AsyncRender<'s> + Send,
76+
SanitizeUrls<R>: AsyncRender<'s> + Send,
77+
PrismCodeBlocks<SanitizeUrls<R>>: AsyncRender<'s> + Send,
78+
{
79+
renderer.sanitize_urls().prism_code_blocks().sanitize()
80+
}
81+
5382
impl UiState {
5483
pub(crate) async fn render_content(
5584
&self,
5685
client: &ClientRef<'_>,
5786
author_id: RostraId,
5887
content: &str,
5988
) -> Markup {
60-
// Compose the filters using extension traits: Renderer -> ProfileLinks ->
61-
// Media -> PrismCodeBlocks -> Sanitize
62-
let renderer = jotup::html::tokio::Renderer::default()
63-
.rostra_profile_links(client.clone())
64-
.rostra_media(client.clone(), author_id)
65-
.prism_code_blocks()
66-
.sanitize();
89+
// Compose filters: ProfileLinks -> Media -> (Prism + Sanitize via
90+
// make_base_renderer)
91+
let renderer = make_base_renderer(
92+
jotup::html::tokio::Renderer::default()
93+
.rostra_profile_links(client.clone())
94+
.rostra_media(client.clone(), author_id),
95+
);
6796

6897
let out = renderer
6998
.render_into_document(content)

crates/rostra-web-ui/src/routes/content/filters.rs

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -553,3 +553,81 @@ where
553553
self.inner.into_output()
554554
}
555555
}
556+
557+
/// Filter that sanitizes dangerous URL protocols (javascript:, vbscript:,
558+
/// data:) in links and images. All data: URLs are blocked out of caution.
559+
pub(crate) struct SanitizeUrls<R> {
560+
inner: R,
561+
}
562+
563+
impl<R> SanitizeUrls<R> {
564+
pub(crate) fn new(inner: R) -> Self {
565+
Self { inner }
566+
}
567+
}
568+
569+
impl<R> SanitizeUrls<R> {
570+
/// Check if a URL uses a dangerous protocol that could execute code
571+
fn is_dangerous_url(url: &str) -> bool {
572+
let url_lower = url.trim().to_lowercase();
573+
url_lower.starts_with("javascript:")
574+
|| url_lower.starts_with("vbscript:")
575+
|| url_lower.starts_with("data:")
576+
}
577+
}
578+
579+
#[async_trait::async_trait]
580+
impl<'s, R> AsyncRender<'s> for SanitizeUrls<R>
581+
where
582+
R: AsyncRender<'s> + Send,
583+
{
584+
type Error = R::Error;
585+
586+
async fn emit(&mut self, event: Event<'s>) -> Result<(), Self::Error> {
587+
match event {
588+
Event::Start(Container::Link(url, link_type), attr) => {
589+
if Self::is_dangerous_url(&url) {
590+
// Replace dangerous URL with safe "#"
591+
self.inner
592+
.emit(Event::Start(
593+
Container::Link(Cow::Borrowed("#"), link_type),
594+
attr,
595+
))
596+
.await
597+
} else {
598+
self.inner
599+
.emit(Event::Start(Container::Link(url, link_type), attr))
600+
.await
601+
}
602+
}
603+
Event::Start(Container::Image(url, link_type), attr) => {
604+
if Self::is_dangerous_url(&url) {
605+
// Replace dangerous URL with empty string (broken image is safer)
606+
self.inner
607+
.emit(Event::Start(
608+
Container::Image(Cow::Borrowed(""), link_type),
609+
attr,
610+
))
611+
.await
612+
} else {
613+
self.inner
614+
.emit(Event::Start(Container::Image(url, link_type), attr))
615+
.await
616+
}
617+
}
618+
event => self.inner.emit(event).await,
619+
}
620+
}
621+
}
622+
623+
#[async_trait::async_trait]
624+
impl<'s, R> AsyncRenderOutput<'s> for SanitizeUrls<R>
625+
where
626+
R: AsyncRenderOutput<'s> + Send,
627+
{
628+
type Output = R::Output;
629+
630+
fn into_output(self) -> Self::Output {
631+
self.inner.into_output()
632+
}
633+
}

crates/rostra-web-ui/src/routes/content/tests.rs

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,12 @@ use std::str::FromStr;
33
use jotup::r#async::AsyncRenderOutputExt;
44
use rostra_core::id::RostraId;
55

6-
use super::RostraRenderExt;
6+
use super::{RostraRenderExt, make_base_renderer};
77
use crate::UiState;
88

9+
mod url_sanitization;
10+
mod xss_sanitization;
11+
912
#[test]
1013
fn extract_rostra_id_link() {
1114
assert_eq!(
@@ -49,7 +52,6 @@ async fn code_block_gets_prism_classes() {
4952

5053
let html = render_with_prism(content).await;
5154

52-
// Should have language class on code element
5355
assert!(
5456
html.contains("language-rust"),
5557
"Missing language-rust class"
@@ -62,7 +64,6 @@ async fn code_block_unknown_language() {
6264

6365
let html = render_with_prism(content).await;
6466

65-
// Should still render as code block
6667
assert!(html.contains("<code"), "Missing code element");
6768
}
6869

@@ -72,7 +73,6 @@ async fn inline_code_not_affected_by_prism() {
7273

7374
let html = render_with_prism(content).await;
7475

75-
// Inline code should not get language class
7676
assert!(
7777
!html.contains("language-"),
7878
"Inline code should not have language class"
@@ -97,16 +97,13 @@ fn djot_image_with_apostrophe_events() {
9797
let content = r#"![I'ts](https://www.youtube.com/watch?v=Z0GFRcFm-aY)"#;
9898
let events = render_events(content);
9999

100-
// The apostrophe in "I'ts" is parsed as a RightSingleQuote event between Str
101-
// events
102100
assert!(
103101
events
104102
.iter()
105103
.any(|e| matches!(e, jotup::Event::RightSingleQuote)),
106104
"Expected RightSingleQuote event for the apostrophe"
107105
);
108106

109-
// Check the Str events contain "I" and "ts" separately
110107
let str_contents: Vec<_> = events
111108
.iter()
112109
.filter_map(|e| match e {
@@ -126,11 +123,9 @@ fn djot_image_with_apostrophe_events() {
126123

127124
#[test]
128125
fn djot_image_with_multiple_smart_punctuation() {
129-
// Test various smart punctuation in alt text
130126
let content = r#"![It's "great"...](https://example.com/img.png)"#;
131127
let events = render_events(content);
132128

133-
// Should have right single quote, double quotes, and ellipsis
134129
assert!(
135130
events
136131
.iter()
@@ -151,7 +146,6 @@ fn djot_image_with_multiple_smart_punctuation() {
151146

152147
#[test]
153148
fn djot_image_with_softbreak_and_symbol() {
154-
// Test that multi-line alt text generates Softbreak events
155149
let content = "![line1\nline2](https://example.com/img.png)";
156150
let events = render_events(content);
157151

@@ -160,7 +154,6 @@ fn djot_image_with_softbreak_and_symbol() {
160154
"Expected Softbreak event for newline in alt text"
161155
);
162156

163-
// Test symbol syntax in alt text
164157
let content_sym = "![a :smile: emoji](https://example.com/img.png)";
165158
let events_sym = render_events(content_sym);
166159

@@ -171,3 +164,15 @@ fn djot_image_with_softbreak_and_symbol() {
171164
"Expected Symbol event for :smile: in alt text"
172165
);
173166
}
167+
168+
/// Helper to render djot content with full sanitization (like production).
169+
/// Uses the same sanitization chain as production code via
170+
/// `make_base_renderer`.
171+
pub(super) async fn render_sanitized(content: &str) -> String {
172+
let out = make_base_renderer(jotup::html::tokio::Renderer::default())
173+
.render_into_document(content)
174+
.await
175+
.expect("Rendering failed");
176+
177+
String::from_utf8(out.into_inner()).expect("valid utf8")
178+
}
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
//! Tests for URL protocol sanitization.
2+
//!
3+
//! These tests verify that dangerous URL protocols (javascript:, vbscript:,
4+
//! data:) are blocked in links and images.
5+
6+
use super::render_sanitized;
7+
8+
/// Verify that javascript: URLs in djot links are sanitized.
9+
/// The SanitizeUrls filter replaces them with "#".
10+
#[tokio::test]
11+
async fn javascript_url_in_link_is_sanitized() {
12+
let content = r#"[click me](javascript:alert('xss'))"#;
13+
let html = render_sanitized(content).await;
14+
assert!(
15+
!html.contains(r#"href="javascript:"#),
16+
"javascript: URLs should be blocked in href attributes. Got: {html}"
17+
);
18+
// Should be replaced with #
19+
assert!(
20+
html.contains(r##"href="#""##),
21+
"dangerous URL should be replaced with #. Got: {html}"
22+
);
23+
}
24+
25+
/// Verify that javascript: URLs in djot autolinks are sanitized.
26+
/// The SanitizeUrls filter replaces them with "#".
27+
#[tokio::test]
28+
async fn autolink_javascript_is_sanitized() {
29+
let content = "<javascript:alert('xss')>";
30+
let html = render_sanitized(content).await;
31+
assert!(
32+
!html.contains(r#"href="javascript:"#),
33+
"javascript: URLs should be blocked in autolinks. Got: {html}"
34+
);
35+
}
36+
37+
/// Verify that vbscript: URLs are also sanitized.
38+
#[tokio::test]
39+
async fn vbscript_url_is_sanitized() {
40+
let content = r#"[click me](vbscript:alert('xss'))"#;
41+
let html = render_sanitized(content).await;
42+
assert!(
43+
!html.contains(r#"href="vbscript:"#),
44+
"vbscript: URLs should be blocked. Got: {html}"
45+
);
46+
}
47+
48+
/// Verify that data: URLs are blocked (can be used for XSS).
49+
#[tokio::test]
50+
async fn data_url_in_link_is_sanitized() {
51+
let content = r#"[click me](data:text/html,<script>alert('xss')</script>)"#;
52+
let html = render_sanitized(content).await;
53+
assert!(
54+
!html.contains(r#"href="data:"#),
55+
"data: URLs should be blocked. Got: {html}"
56+
);
57+
}
58+
59+
/// Verify that ALL data: URLs are blocked, including seemingly safe ones.
60+
/// We block all data: URLs out of caution.
61+
#[tokio::test]
62+
async fn all_data_urls_are_blocked() {
63+
// Even data:image URLs are blocked
64+
let content = r#"![img](data:image/png;base64,iVBORw0KGgo=)"#;
65+
let html = render_sanitized(content).await;
66+
assert!(
67+
!html.contains("data:image"),
68+
"All data: URLs should be blocked, including images. Got: {html}"
69+
);
70+
}

0 commit comments

Comments
 (0)