diff --git a/Cargo.lock b/Cargo.lock index de034ff..43dab1b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -19,6 +19,29 @@ dependencies = [ "tracing", ] +[[package]] +name = "actix-files" +version = "0.6.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df8c4f30e3272d7c345f88ae0aac3848507ef5ba871f9cc2a41c8085a0f0523b" +dependencies = [ + "actix-http", + "actix-service", + "actix-utils", + "actix-web", + "bitflags 2.11.0", + "bytes", + "derive_more", + "futures-core", + "http-range", + "log", + "mime", + "mime_guess", + "percent-encoding", + "pin-project-lite", + "v_htmlescape", +] + [[package]] name = "actix-http" version = "3.12.0" @@ -898,6 +921,12 @@ dependencies = [ "itoa", ] +[[package]] +name = "http-range" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21dec9db110f5f872ed9699c3ecf50cf16f423502706ba5c72462e28d3157573" + [[package]] name = "httparse" version = "1.10.1" @@ -1219,6 +1248,7 @@ dependencies = [ name = "mdwatch" version = "0.1.21" dependencies = [ + "actix-files", "actix-web", "actix-ws", "ammonia", @@ -1227,6 +1257,7 @@ dependencies = [ "notify", "notify-debouncer-full", "pulldown-cmark", + "regex", "rust-embed", "tokio", "webbrowser", @@ -1244,6 +1275,16 @@ version = "0.3.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" +[[package]] +name = "mime_guess" +version = "2.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7c44f8e672c00fe5308fa235f821cb4198414e1c77935c1ab6948d3fd78550e" +dependencies = [ + "mime", + "unicase", +] + [[package]] name = "miniz_oxide" version = "0.8.9" @@ -2151,6 +2192,12 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" +[[package]] +name = "v_htmlescape" +version = "0.15.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e8257fbc510f0a46eb602c10215901938b5c2a7d5e70fc11483b1d3c9b5b18c" + [[package]] name = "version_check" version = "0.9.5" diff --git a/Cargo.toml b/Cargo.toml index 70a59ac..91fcdc9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,6 +11,7 @@ documentation = "https://github.com/santoshxshrestha/mdwatch#readme" readme = "README.md" [dependencies] +actix-files = "0.6.10" actix-web = "4.11.0" actix-ws = "0.4.0" ammonia = "4.1.1" @@ -19,6 +20,7 @@ clap = { version = "4.5.46", features = ["derive"] } notify = "8.2.0" notify-debouncer-full = "0.7.0" pulldown-cmark = "0.13.0" +regex = "1.12.3" rust-embed = { version = "8.11.0", features = ["interpolate-folder-path"] } tokio = { version = "1.49.0", features = ["full"] } webbrowser = "1.0.5" diff --git a/src/main.rs b/src/main.rs index f56175a..904c244 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,21 +1,24 @@ use actix_web::web; +use ammonia::UrlRelative::PassThrough; use notify::event::RemoveKind; use notify_debouncer_full::DebouncedEvent; use notify_debouncer_full::{DebounceEventResult, new_debouncer, notify::*}; use pulldown_cmark::Options; -use std::path::Path; +use std::path::{Path, PathBuf}; use std::time::{Duration, Instant}; use tokio::fs; mod args; +use actix_files::NamedFile; use actix_web::App; use actix_web::HttpServer; use actix_web::Responder; use actix_web::get; use actix_web::{HttpRequest, HttpResponse}; -use ammonia::clean; +use ammonia::Builder; use args::MdwatchArgs; use askama::Template; use clap::Parser; +use regex::Regex; use notify::{RecursiveMode, event::ModifyKind}; use rust_embed::Embed; @@ -104,6 +107,37 @@ async fn ws_handler( Ok(response) } +/// Rewrite local image `src` attributes to use the `/_local_image/` prefix. +/// Remote images (http://, https://, //, data:) are left untouched. +fn rewrite_image_paths(html: &str) -> String { + let re = Regex::new(r#"(]*?src\s*=\s*")([^"]*?)(")"#).expect("invalid regex"); + re.replace_all(html, |caps: ®ex::Captures| { + let prefix = &caps[1]; + let src = &caps[2]; + let suffix = &caps[3]; + // Skip remote URLs and data URIs + if src.starts_with("http://") + || src.starts_with("https://") + || src.starts_with("//") + || src.starts_with("data:") + { + format!("{}{}{}", prefix, src, suffix) + } else { + format!("{}/_local_image/{}{}", prefix, src, suffix) + } + }) + .to_string() +} + +/// Sanitize HTML while preserving relative URLs (needed for /_local_image/ paths). +fn sanitize_html(html: &str) -> String { + Builder::default() + .url_relative(PassThrough) + .add_generic_attributes(&["align"]) + .clean(html) + .to_string() +} + async fn get_markdown(file_path: &String) -> std::io::Result { let markdown_input: String = fs::read_to_string(file_path).await?; let options = Options::all(); @@ -111,7 +145,8 @@ async fn get_markdown(file_path: &String) -> std::io::Result { let mut html_output = String::new(); pulldown_cmark::html::push_html(&mut html_output, parser); - html_output = clean(&html_output); + html_output = rewrite_image_paths(&html_output); + html_output = sanitize_html(&html_output); Ok(html_output) } @@ -196,14 +231,55 @@ async fn home(file: web::Data) -> actix_web::Result { } } +/// Serve local image files referenced in the markdown. +/// Resolves the requested path relative to the markdown file's parent directory. +#[get("/_local_image/{path:.*}")] +async fn serve_local_image( + path: web::Path, + base_dir: web::Data, +) -> actix_web::Result { + let requested = path.into_inner(); + let resolved = base_dir.join(&requested); + + // Canonicalize to prevent directory traversal attacks (e.g. ../../etc/passwd) + let canonical = resolved + .canonicalize() + .map_err(|_| actix_web::error::ErrorNotFound("Image not found"))?; + + let base_canonical = base_dir + .canonicalize() + .map_err(|_| actix_web::error::ErrorInternalServerError("Invalid base directory"))?; + + if !canonical.starts_with(&base_canonical) { + return Err(actix_web::error::ErrorForbidden( + "Access denied: path outside base directory", + )); + } + + Ok(NamedFile::open(canonical)?) +} + #[actix_web::main] async fn main() -> std::io::Result<()> { let args = MdwatchArgs::parse(); let MdwatchArgs { file, ip, port } = args; + // Resolve the parent directory of the markdown file for serving local images + let file_path = Path::new(&file); + let base_dir: PathBuf = file_path + .parent() + .map(|p| { + if p.as_os_str().is_empty() { + PathBuf::from(".") + } else { + p.to_path_buf() + } + }) + .unwrap_or_else(|| PathBuf::from(".")); + if ip == "0.0.0.0" { - eprintln!(" Warning: Binding to 0.0.0.0 exposes your server to the entire network!"); + eprintln!(" Warning: Binding to 0.0.0.0 exposes your server to the entire network!"); eprintln!(" Make sure you trust your network or firewall settings."); } @@ -218,9 +294,52 @@ async fn main() -> std::io::Result<()> { App::new() .route("/ws", web::get().to(ws_handler)) .service(home) + .service(serve_local_image) .app_data(web::Data::new(file.clone())) + .app_data(web::Data::new(base_dir.clone())) }) .bind(format!("{}:{}", ip, port))? .run() .await } + +#[cfg(test)] +mod tests { + use super::*; + + struct TestCase { + input: &'static str, + expected: &'static str, + } + + #[test] + fn test_rewrite_image_paths() { + let test_cases = [ + TestCase { + input: r#"Image"#, + expected: r#"Image"#, + }, + TestCase { + input: r#"Remote Image"#, + expected: r#"Remote Image"#, + }, + TestCase { + input: r#"Data URI"#, + expected: r#"Data URI"#, + }, + TestCase { + input: r#"Protocol-relative URL"#, + expected: r#"Protocol-relative URL"#, + }, + ]; + + for case in test_cases { + let result = rewrite_image_paths(case.input); + assert_eq!( + result, case.expected, + "Failed to rewrite image paths for input: {}", + case.input + ); + } + } +}