chore: test input sanitization

dpc · dpc · commit 72f21cea46bb · 2026-02-03T00:30:33.000-08:00
diff --git a/crates/rostra-web-ui/src/routes/content.rs b/crates/rostra-web-ui/src/routes/content.rs
@@ -13,7 +13,7 @@ use crate::UiState;
 
 mod filters;
 
-use filters::{PrismCodeBlocks, RostraMedia, RostraProfileLinks};
+use filters::{PrismCodeBlocks, RostraMedia, RostraProfileLinks, SanitizeUrls};
 
 /// Extension trait for adding rostra-specific rendering transformations
 pub trait RostraRenderExt {
@@ -46,24 +46,53 @@ pub trait RostraRenderExt {
     {
         PrismCodeBlocks::new(self)
     }
+
+    /// Sanitize dangerous URL protocols (javascript:, vbscript:, data:)
+    fn sanitize_urls(self) -> SanitizeUrls<Self>
+    where
+        Self: Sized,
+    {
+        SanitizeUrls::new(self)
+    }
 }
 
 impl<'s, R> RostraRenderExt for R where R: Sized + AsyncRender<'s> {}
 
+/// Apply standard output filters (URL sanitization + syntax highlighting + XSS
+/// sanitization).
+///
+/// This is the final processing step for all content rendering. Takes an inner
+/// renderer and wraps it with URL sanitization, prism code blocks, and HTML
+/// sanitization.
+///
+/// - Production:
+///   `make_base_renderer(Renderer::default().profile_links().media())`
+/// - Tests: `make_base_renderer(Renderer::default())`
+pub(crate) fn make_base_renderer<'s, R>(
+    renderer: R,
+) -> jotup::html::filters::AsyncSanitize<PrismCodeBlocks<SanitizeUrls<R>>>
+where
+    R: AsyncRender<'s> + Send,
+    SanitizeUrls<R>: AsyncRender<'s> + Send,
+    PrismCodeBlocks<SanitizeUrls<R>>: AsyncRender<'s> + Send,
+{
+    renderer.sanitize_urls().prism_code_blocks().sanitize()
+}
+
 impl UiState {
     pub(crate) async fn render_content(
         &self,
         client: &ClientRef<'_>,
         author_id: RostraId,
         content: &str,
     ) -> Markup {
-        // Compose the filters using extension traits: Renderer -> ProfileLinks ->
-        // Media -> PrismCodeBlocks -> Sanitize
-        let renderer = jotup::html::tokio::Renderer::default()
-            .rostra_profile_links(client.clone())
-            .rostra_media(client.clone(), author_id)
-            .prism_code_blocks()
-            .sanitize();
+        // Compose filters: ProfileLinks -> Media -> (Prism + Sanitize via
+        // make_base_renderer)
+        let renderer = make_base_renderer(
+            jotup::html::tokio::Renderer::default()
+                .rostra_profile_links(client.clone())
+                .rostra_media(client.clone(), author_id),
+        );
 
         let out = renderer
             .render_into_document(content)
diff --git a/crates/rostra-web-ui/src/routes/content/filters.rs b/crates/rostra-web-ui/src/routes/content/filters.rs
@@ -553,3 +553,81 @@ where
         self.inner.into_output()
     }
 }
+
+/// Filter that sanitizes dangerous URL protocols (javascript:, vbscript:,
+/// data:) in links and images. All data: URLs are blocked out of caution.
+pub(crate) struct SanitizeUrls<R> {
+    inner: R,
+}
+
+impl<R> SanitizeUrls<R> {
+    pub(crate) fn new(inner: R) -> Self {
+        Self { inner }
+    }
+}
+
+impl<R> SanitizeUrls<R> {
+    /// Check if a URL uses a dangerous protocol that could execute code
+    fn is_dangerous_url(url: &str) -> bool {
+        let url_lower = url.trim().to_lowercase();
+        url_lower.starts_with("javascript:")
+            || url_lower.starts_with("vbscript:")
+            || url_lower.starts_with("data:")
+    }
+}
+
+#[async_trait::async_trait]
+impl<'s, R> AsyncRender<'s> for SanitizeUrls<R>
+where
+    R: AsyncRender<'s> + Send,
+{
+    type Error = R::Error;
+
+    async fn emit(&mut self, event: Event<'s>) -> Result<(), Self::Error> {
+        match event {
+            Event::Start(Container::Link(url, link_type), attr) => {
+                if Self::is_dangerous_url(&url) {
+                    // Replace dangerous URL with safe "#"
+                    self.inner
+                        .emit(Event::Start(
+                            Container::Link(Cow::Borrowed("#"), link_type),
+                            attr,
+                        ))
+                        .await
+                } else {
+                    self.inner
+                        .emit(Event::Start(Container::Link(url, link_type), attr))
+                        .await
+                }
+            }
+            Event::Start(Container::Image(url, link_type), attr) => {
+                if Self::is_dangerous_url(&url) {
+                    // Replace dangerous URL with empty string (broken image is safer)
+                    self.inner
+                        .emit(Event::Start(
+                            Container::Image(Cow::Borrowed(""), link_type),
+                            attr,
+                        ))
+                        .await
+                } else {
+                    self.inner
+                        .emit(Event::Start(Container::Image(url, link_type), attr))
+                        .await
+                }
+            }
+            event => self.inner.emit(event).await,
+        }
+    }
+}
+
+#[async_trait::async_trait]
+impl<'s, R> AsyncRenderOutput<'s> for SanitizeUrls<R>
+where
+    R: AsyncRenderOutput<'s> + Send,
+{
+    type Output = R::Output;
+
+    fn into_output(self) -> Self::Output {
+        self.inner.into_output()
+    }
+}
diff --git a/crates/rostra-web-ui/src/routes/content/tests.rs b/crates/rostra-web-ui/src/routes/content/tests.rs
@@ -3,9 +3,12 @@ use std::str::FromStr;
 use jotup::r#async::AsyncRenderOutputExt;
 use rostra_core::id::RostraId;
 
-use super::RostraRenderExt;
+use super::{RostraRenderExt, make_base_renderer};
 use crate::UiState;
 
+mod url_sanitization;
+mod xss_sanitization;
+
 #[test]
 fn extract_rostra_id_link() {
     assert_eq!(
@@ -49,7 +52,6 @@ async fn code_block_gets_prism_classes() {
 
     let html = render_with_prism(content).await;
 
-    // Should have language class on code element
     assert!(
         html.contains("language-rust"),
         "Missing language-rust class"
@@ -62,7 +64,6 @@ async fn code_block_unknown_language() {
 
     let html = render_with_prism(content).await;
 
-    // Should still render as code block
     assert!(html.contains("<code"), "Missing code element");
 }
 
@@ -72,7 +73,6 @@ async fn inline_code_not_affected_by_prism() {
 
     let html = render_with_prism(content).await;
 
-    // Inline code should not get language class
     assert!(
         !html.contains("language-"),
         "Inline code should not have language class"
@@ -97,16 +97,13 @@ fn djot_image_with_apostrophe_events() {
     let content = r#"![I'ts](https://www.youtube.com/watch?v=Z0GFRcFm-aY)"#;
     let events = render_events(content);
 
-    // The apostrophe in "I'ts" is parsed as a RightSingleQuote event between Str
-    // events
     assert!(
         events
             .iter()
             .any(|e| matches!(e, jotup::Event::RightSingleQuote)),
         "Expected RightSingleQuote event for the apostrophe"
     );
 
-    // Check the Str events contain "I" and "ts" separately
     let str_contents: Vec<_> = events
         .iter()
         .filter_map(|e| match e {
@@ -126,11 +123,9 @@ fn djot_image_with_apostrophe_events() {
 
 #[test]
 fn djot_image_with_multiple_smart_punctuation() {
-    // Test various smart punctuation in alt text
     let content = r#"![It's "great"...](https://example.com/img.png)"#;
     let events = render_events(content);
 
-    // Should have right single quote, double quotes, and ellipsis
     assert!(
         events
             .iter()
@@ -151,7 +146,6 @@ fn djot_image_with_multiple_smart_punctuation() {
 
 #[test]
 fn djot_image_with_softbreak_and_symbol() {
-    // Test that multi-line alt text generates Softbreak events
     let content = "![line1\nline2](https://example.com/img.png)";
     let events = render_events(content);
 
@@ -160,7 +154,6 @@ fn djot_image_with_softbreak_and_symbol() {
         "Expected Softbreak event for newline in alt text"
     );
 
-    // Test symbol syntax in alt text
     let content_sym = "![a :smile: emoji](https://example.com/img.png)";
     let events_sym = render_events(content_sym);
 
@@ -171,3 +164,15 @@ fn djot_image_with_softbreak_and_symbol() {
         "Expected Symbol event for :smile: in alt text"
     );
 }
+
+/// Helper to render djot content with full sanitization (like production).
+/// Uses the same sanitization chain as production code via
+/// `make_base_renderer`.
+pub(super) async fn render_sanitized(content: &str) -> String {
+    let out = make_base_renderer(jotup::html::tokio::Renderer::default())
+        .render_into_document(content)
+        .await
+        .expect("Rendering failed");
+
+    String::from_utf8(out.into_inner()).expect("valid utf8")
+}
diff --git a/crates/rostra-web-ui/src/routes/content/tests/url_sanitization.rs b/crates/rostra-web-ui/src/routes/content/tests/url_sanitization.rs
@@ -0,0 +1,70 @@
+//! Tests for URL protocol sanitization.
+//!
+//! These tests verify that dangerous URL protocols (javascript:, vbscript:,
+//! data:) are blocked in links and images.
+
+use super::render_sanitized;
+
+/// Verify that javascript: URLs in djot links are sanitized.
+/// The SanitizeUrls filter replaces them with "#".
+#[tokio::test]
+async fn javascript_url_in_link_is_sanitized() {
+    let content = r#"[click me](javascript:alert('xss'))"#;
+    let html = render_sanitized(content).await;
+    assert!(
+        !html.contains(r#"href="javascript:"#),
+        "javascript: URLs should be blocked in href attributes. Got: {html}"
+    );
+    // Should be replaced with #
+    assert!(
+        html.contains(r##"href="#""##),
+        "dangerous URL should be replaced with #. Got: {html}"
+    );
+}
+
+/// Verify that javascript: URLs in djot autolinks are sanitized.
+/// The SanitizeUrls filter replaces them with "#".
+#[tokio::test]
+async fn autolink_javascript_is_sanitized() {
+    let content = "<javascript:alert('xss')>";
+    let html = render_sanitized(content).await;
+    assert!(
+        !html.contains(r#"href="javascript:"#),
+        "javascript: URLs should be blocked in autolinks. Got: {html}"
+    );
+}
+
+/// Verify that vbscript: URLs are also sanitized.
+#[tokio::test]
+async fn vbscript_url_is_sanitized() {
+    let content = r#"[click me](vbscript:alert('xss'))"#;
+    let html = render_sanitized(content).await;
+    assert!(
+        !html.contains(r#"href="vbscript:"#),
+        "vbscript: URLs should be blocked. Got: {html}"
+    );
+}
+
+/// Verify that data: URLs are blocked (can be used for XSS).
+#[tokio::test]
+async fn data_url_in_link_is_sanitized() {
+    let content = r#"[click me](data:text/html,<script>alert('xss')</script>)"#;
+    let html = render_sanitized(content).await;
+    assert!(
+        !html.contains(r#"href="data:"#),
+        "data: URLs should be blocked. Got: {html}"
+    );
+}
+
+/// Verify that ALL data: URLs are blocked, including seemingly safe ones.
+/// We block all data: URLs out of caution.
+#[tokio::test]
+async fn all_data_urls_are_blocked() {
+    // Even data:image URLs are blocked
+    let content = r#"![img](data:image/png;base64,iVBORw0KGgo=)"#;
+    let html = render_sanitized(content).await;
+    assert!(
+        !html.contains("data:image"),
+        "All data: URLs should be blocked, including images. Got: {html}"
+    );
+}
diff --git a/crates/rostra-web-ui/src/routes/content/tests/xss_sanitization.rs b/crates/rostra-web-ui/src/routes/content/tests/xss_sanitization.rs