diff --git a/Cargo.lock b/Cargo.lock index e17719ad..d28f5c36 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1002,7 +1002,7 @@ dependencies = [ [[package]] name = "monolith" -version = "2.9.0" +version = "2.10.0" dependencies = [ "assert_cmd", "atty", diff --git a/Cargo.toml b/Cargo.toml index 5189927c..ce877ffb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "monolith" -version = "2.9.0" +version = "2.10.0" authors = [ "Sunshine ", "Mahdi Robatipoor ", @@ -57,3 +57,12 @@ assert_cmd = "2.0.16" default = ["cli", "vendored-openssl"] cli = ["clap", "tempfile"] # Build a CLI tool that includes main() function vendored-openssl = ["openssl/vendored"] # Compile and statically link a copy of OpenSSL + +[lib] +name = "monolith" +path = "src/lib.rs" + +[[bin]] +name = "monolith" +path = "src/main.rs" +required-features = ["cli"] diff --git a/README.md b/README.md index 4f5772da..d5f7e783 100644 --- a/README.md +++ b/README.md @@ -7,13 +7,13 @@ ``` - _____ ______________ __________ ___________________ ___ -| \ / \ | | | | | | -| \_/ __ \_| __ | | ___ ___ |__| | -| | | | | | | | | | | | -| |\ /| |__| _ |__| |____| | | | | __ | -| | \___/ | | \ | | | | | | | -|___| |__________| \_____________________| |___| |___| |___| + _____ _____________ __________ ___________________ ___ +| \ / \ | | | | | | +| \/ __ \| __ | | ___ ___ |__| | +| | | | | | | | | | | | +| |\ /| |__| |__| |___| | | | | __ | +| | \__/ | |\ | | | | | | | +|___| |__________| \___________________| |___| |___| |___| ``` A data hoarder’s dream come true: bundle any web page into a single HTML file. You can finally replace that gazillion of open tabs with a gazillion of .html files stored somewhere on your precious little drive. diff --git a/src/core.rs b/src/core.rs index bca4b00d..541ce41b 100644 --- a/src/core.rs +++ b/src/core.rs @@ -2,7 +2,7 @@ use std::env; use std::error::Error; use std::fmt; use std::fs; -use std::io::{self, prelude::*, Write}; +use std::io::{self, Write}; use std::path::{Path, PathBuf}; use std::time::Duration; @@ -59,7 +59,7 @@ pub enum MonolithOutputFormat { pub struct Options { pub base_url: Option, pub blacklist_domains: bool, - pub cookies: Vec, // TODO: move out of this struct + pub cookies: Vec, // TODO: move out of this struct? pub domains: Option>, pub encoding: Option, pub ignore_errors: bool, @@ -117,83 +117,7 @@ const PLAINTEXT_MEDIA_TYPES: &[&str] = &[ "image/svg+xml", // .svg ]; -// TODO: split it into three separate functions [create_from_data(), create_from_url(), create() -- create is likely not public] -pub fn create_monolithic_document( - source: String, - options: &Options, - cache: &mut Option, -) -> Result<(Vec, Option), MonolithError> { - // Check if source was provided - if source.is_empty() { - return Err(MonolithError::new("no target specified")); - } - - // Check if custom encoding value is acceptable - if let Some(custom_encoding) = options.encoding.clone() { - if Encoding::for_label_no_replacement(custom_encoding.as_bytes()).is_none() { - return Err(MonolithError::new(&format!( - "unknown encoding \"{}\"", - &custom_encoding - ))); - } - } - - let mut use_stdin: bool = false; - - let target_url = match source.as_str() { - "-" => { - // Read from pipe (stdin) - use_stdin = true; - - // Set default target URL to an empty data URL - // (the user can change it by using the custom base URL option) - Url::parse("data:text/html,").unwrap() - } - target => match Url::parse(target) { - Ok(url) => match url.scheme() { - "data" | "file" | "http" | "https" => url, - unsupported_scheme => { - return Err(MonolithError::new(&format!( - "unsupported target URL scheme \"{}\"", - unsupported_scheme - ))); - } - }, - Err(_) => { - // Failed to parse given base URL (perhaps it's a filesystem path?) - let path: &Path = Path::new(&target); - match path.exists() { - true => match path.is_file() { - true => { - let canonical_path = fs::canonicalize(path).unwrap(); - match Url::from_file_path(canonical_path) { - Ok(url) => url, - Err(_) => { - return Err(MonolithError::new(&format!( - "could not generate file URL out of given path \"{}\"", - &target - ))); - } - } - } - false => { - return Err(MonolithError::new(&format!( - "local target \"{}\" is not a file", - &target - ))); - } - }, - false => { - // It is not a FS path, now we do what browsers do: - // prepend "http://" and hope it points to a website - Url::parse(&format!("http://{}", &target)).unwrap() - } - } - } - }, - }; - - // Initialize HTTP client +pub fn init_client(options: &Options) -> Client { let mut header_map = HeaderMap::new(); if let Some(user_agent) = &options.user_agent { header_map.insert( @@ -201,7 +125,7 @@ pub fn create_monolithic_document( HeaderValue::from_str(user_agent).expect("Invalid User-Agent header specified"), ); } - let client = Client::builder() + Client::builder() .timeout(Duration::from_secs(if options.timeout > 0 { options.timeout } else { @@ -212,85 +136,68 @@ pub fn create_monolithic_document( .danger_accept_invalid_certs(options.insecure) .default_headers(header_map) .build() - .expect("Failed to initialize HTTP client"); - - // At first we assume that base URL is same as target URL - let mut base_url: Url = target_url.clone(); - - let data: Vec; - let mut document_encoding: String = "".to_string(); - let mut dom: RcDom; + .expect("Failed to initialize HTTP client") +} - // Retrieve target document - if use_stdin { - data = read_stdin(); - } else if target_url.scheme() == "file" - || target_url.scheme() == "http" - || target_url.scheme() == "https" - || target_url.scheme() == "data" +pub fn create_monolithic_document_from_data( + input_data: Vec, + options: &Options, + cache: &mut Option, + input_encoding: Option, + input_target: Option, +) -> Result<(Vec, Option), MonolithError> { + // Validate options { - match retrieve_asset(cache, &client, &target_url, &target_url, options) { - Ok((retrieved_data, final_url, media_type, charset)) => { - // Provide output as text (without processing it, the way browsers do) - if !media_type.eq_ignore_ascii_case("text/html") - && !media_type.eq_ignore_ascii_case("application/xhtml+xml") - { - return Ok((retrieved_data, None)); - } - - if options - .base_url - .clone() - .unwrap_or("".to_string()) - .is_empty() - { - base_url = final_url; - } - - data = retrieved_data; - document_encoding = charset; - } - Err(_) => { - return Err(MonolithError::new("could not retrieve target document")); + // Check if custom encoding value is acceptable + if let Some(custom_output_encoding) = options.encoding.clone() { + if Encoding::for_label_no_replacement(custom_output_encoding.as_bytes()).is_none() { + return Err(MonolithError::new(&format!( + "unknown encoding \"{}\"", + &custom_output_encoding + ))); } } - } else { - return Err(MonolithError::new("unsupported target")); } - // Initial parse - dom = html_to_dom(&data, document_encoding.clone()); + let client: Client = init_client(options); + let mut base_url: Url = if input_target.is_some() { + Url::parse(&input_target.clone().unwrap()).unwrap() + } else { + Url::parse("data:text/html,").unwrap() + }; + let mut document_encoding: String = input_encoding.clone().unwrap_or("utf-8".to_string()); + let mut dom: RcDom; - // TODO: investigate if charset from filesystem/data URL/HTTP headers - // has say over what's specified in HTML + // Initial parse + dom = html_to_dom(&input_data, document_encoding.clone()); - // Attempt to determine document's charset + // Attempt to determine document's encoding if let Some(html_charset) = get_charset(&dom.document) { if !html_charset.is_empty() { // Check if the charset specified inside HTML is valid - if let Some(encoding) = Encoding::for_label_no_replacement(html_charset.as_bytes()) { + if let Some(document_charset) = + Encoding::for_label_no_replacement(html_charset.as_bytes()) + { document_encoding = html_charset; - dom = html_to_dom(&data, encoding.name().to_string()); + dom = html_to_dom(&input_data, document_charset.name().to_string()); } } } // Use custom base URL if specified; read and use what's in the DOM otherwise - let custom_base_url: String = options.base_url.clone().unwrap_or("".to_string()); + let custom_base_url: String = options.base_url.clone().unwrap_or_default(); if custom_base_url.is_empty() { - // No custom base URL is specified - // Try to see if document has BASE element + // No custom base URL is specified; try to see if document has BASE element if let Some(existing_base_url) = get_base_url(&dom.document) { - base_url = resolve_url(&target_url, &existing_base_url); + base_url = resolve_url(&base_url, &existing_base_url); } } else { // Custom base URL provided match Url::parse(&custom_base_url) { Ok(parsed_url) => { if parsed_url.scheme() == "file" { - // File base URLs can only work with - // documents saved from filesystem - if target_url.scheme() == "file" { + // File base URLs can only work with documents saved from filesystem + if base_url.scheme() == "file" { base_url = parsed_url; } } else { @@ -299,7 +206,7 @@ pub fn create_monolithic_document( } Err(_) => { // Failed to parse given base URL, perhaps it's a filesystem path? - if target_url.scheme() == "file" { + if base_url.scheme() == "file" { // Relative paths could work for documents saved from filesystem let path: &Path = Path::new(&custom_base_url); if path.exists() { @@ -330,12 +237,21 @@ pub fn create_monolithic_document( // Request and embed /favicon.ico (unless it's already linked in the document) if !options.no_images - && (target_url.scheme() == "http" || target_url.scheme() == "https") + && (base_url.scheme() == "http" || base_url.scheme() == "https") + && (input_target.is_some() + && (input_target.as_ref().unwrap().starts_with("http:") + || input_target.as_ref().unwrap().starts_with("https:"))) && !has_favicon(&dom.document) { let favicon_ico_url: Url = resolve_url(&base_url, "/favicon.ico"); - match retrieve_asset(cache, &client, &target_url, &favicon_ico_url, options) { + match retrieve_asset( + cache, + &client, + /*&target_url, */ &base_url, + &favicon_ico_url, + options, + ) { Ok((data, final_url, media_type, charset)) => { let favicon_data_url: Url = create_data_url(&media_type, &charset, &data, &final_url); @@ -360,8 +276,10 @@ pub fn create_monolithic_document( let mut result: Vec = serialize_document(dom, document_encoding, options); // Prepend metadata comment tag - if !options.no_metadata { - let mut metadata_comment: String = create_metadata_tag(&target_url); + if !options.no_metadata && !input_target.clone().unwrap_or_default().is_empty() { + let mut metadata_comment: String = + create_metadata_tag(&Url::parse(&input_target.unwrap_or_default()).unwrap()); + // let mut metadata_comment: String = create_metadata_tag(target); metadata_comment += "\n"; result.splice(0..0, metadata_comment.as_bytes().to_vec()); } @@ -372,6 +290,120 @@ pub fn create_monolithic_document( } } +pub fn create_monolithic_document( + target: String, + options: &mut Options, + cache: &mut Option, +) -> Result<(Vec, Option), MonolithError> { + // Check if target was provided + if target.is_empty() { + return Err(MonolithError::new("no target specified")); + } + + // Validate options + { + // Check if custom encoding value is acceptable + if let Some(custom_encoding) = options.encoding.clone() { + if Encoding::for_label_no_replacement(custom_encoding.as_bytes()).is_none() { + return Err(MonolithError::new(&format!( + "unknown encoding \"{}\"", + &custom_encoding + ))); + } + } + } + + let target_url = match target.as_str() { + target_str => match Url::parse(target_str) { + Ok(target_url) => match target_url.scheme() { + "data" | "file" | "http" | "https" => target_url, + unsupported_scheme => { + return Err(MonolithError::new(&format!( + "unsupported target URL scheme \"{}\"", + unsupported_scheme + ))); + } + }, + Err(_) => { + // Failed to parse given base URL (perhaps it's a filesystem path?) + let path: &Path = Path::new(&target_str); + + match path.exists() { + true => match path.is_file() { + true => { + let canonical_path = fs::canonicalize(path).unwrap(); + + match Url::from_file_path(canonical_path) { + Ok(url) => url, + Err(_) => { + return Err(MonolithError::new(&format!( + "could not generate file URL out of given path \"{}\"", + &target_str + ))); + } + } + } + false => { + return Err(MonolithError::new(&format!( + "local target \"{}\" is not a file", + &target_str + ))); + } + }, + false => { + // It is not a FS path, now we do what browsers do: + // prepend "http://" and hope it points to a website + Url::parse(&format!("http://{}", &target_str)).unwrap() + } + } + } + }, + }; + + let client: Client = init_client(options); + let data: Vec; + let mut base_url: Url = target_url.clone(); + let document_encoding: Option; + + // Retrieve target document + if target_url.scheme() == "file" + || target_url.scheme() == "http" + || target_url.scheme() == "https" + || target_url.scheme() == "data" + { + match retrieve_asset(cache, &client, &target_url, &target_url, options) { + Ok((retrieved_data, final_url, media_type, charset)) => { + if !media_type.eq_ignore_ascii_case("text/html") + && !media_type.eq_ignore_ascii_case("application/xhtml+xml") + { + // Provide output as text (without processing it, the way browsers do) + return Ok((retrieved_data, None)); + } + + if options.base_url.clone().unwrap_or_default().is_empty() { + base_url = final_url; + } + + data = retrieved_data; + document_encoding = Some(charset); + } + Err(_) => { + return Err(MonolithError::new("could not retrieve target document")); + } + } + } else { + return Err(MonolithError::new("unsupported target")); + } + + create_monolithic_document_from_data( + data, + options, + cache, + document_encoding, + Some(target_url.to_string()), + ) +} + pub fn detect_media_type(data: &[u8], url: &Url) -> String { // At first attempt to read file's header for file_signature in FILE_SIGNATURES.iter() { @@ -670,15 +702,6 @@ pub fn retrieve_asset( } } -pub fn read_stdin() -> Vec { - let mut buffer: Vec = vec![]; - - match io::stdin().lock().read_to_end(&mut buffer) { - Ok(_) => buffer, - Err(_) => buffer, - } -} - pub fn print_error_message(text: &str, options: &Options) { if !options.silent { let stderr = io::stderr(); diff --git a/src/html.rs b/src/html.rs index ab47fa30..5cc17e6a 100644 --- a/src/html.rs +++ b/src/html.rs @@ -1,5 +1,5 @@ -use base64::prelude::*; -use chrono::prelude::*; +use base64::{prelude::BASE64_STANDARD, Engine}; +use chrono::{SecondsFormat, Utc}; use encoding_rs::Encoding; use html5ever::interface::{Attribute, QualName}; use html5ever::parse_document; @@ -34,11 +34,11 @@ pub enum LinkType { struct SrcSetItem<'a> { path: &'a str, - descriptor: &'a str, + descriptor: &'a str, // Width or pixel density descriptor } const FAVICON_VALUES: &[&str] = &["icon", "shortcut icon"]; -const WHITESPACES: &[char] = &['\t', '\n', '\x0c', '\r', ' ']; +const WHITESPACES: &[char] = &[' ', '\t', '\n', '\x0c', '\r']; // ASCII whitespaces pub fn add_favicon(document: &Handle, favicon_data_url: String) -> RcDom { let mut buf: Vec = Vec::new(); @@ -154,55 +154,16 @@ pub fn embed_srcset( srcset: &str, options: &Options, ) -> String { - let mut array: Vec = vec![]; - - // Parse srcset attribute according to the specs - // https://html.spec.whatwg.org/multipage/images.html#srcset-attribute - let mut offset = 0; - let size = srcset.chars().count(); - - while offset < size { - let mut has_descriptor = true; - // Zero or more whitespaces + skip leading comma - let url_start = offset - + srcset[offset..] - .chars() - .take_while(|&c| WHITESPACES.contains(&c) || c == ',') - .count(); - if url_start >= size { - break; - } - // A valid non-empty URL that does not start or end with comma - let mut url_end = url_start - + srcset[url_start..] - .chars() - .take_while(|&c| !WHITESPACES.contains(&c)) - .count(); - while (url_end - 1) > url_start && srcset.chars().nth(url_end - 1).unwrap() == ',' { - has_descriptor = false; - url_end -= 1; - } - offset = url_end; - // If the URL wasn't terminated by comma there may also be a descriptor - if has_descriptor { - offset += srcset[url_end..].chars().take_while(|&c| c != ',').count(); - } - // Collect SrcSetItem - if url_end > url_start { - let path = &srcset[url_start..url_end]; - let descriptor = &srcset[url_end..offset].trim(); - let srcset_real_item = SrcSetItem { path, descriptor }; - array.push(srcset_real_item); - } - } + let srcset_items: Vec = parse_srcset(srcset); + // Embed assets let mut result: String = "".to_string(); - let mut i: usize = array.len(); - for part in array { + let mut i: usize = srcset_items.len(); + for srcset_item in srcset_items { if options.no_images { result.push_str(EMPTY_IMAGE_DATA_URL); } else { - let image_full_url: Url = resolve_url(document_url, part.path); + let image_full_url: Url = resolve_url(document_url, srcset_item.path); match retrieve_asset(cache, client, document_url, &image_full_url, options) { Ok((image_data, image_final_url, image_media_type, image_charset)) => { let mut image_data_url = create_data_url( @@ -227,9 +188,9 @@ pub fn embed_srcset( } } - if !part.descriptor.is_empty() { + if !srcset_item.descriptor.is_empty() { result.push(' '); - result.push_str(part.descriptor); + result.push_str(srcset_item.descriptor); } if i > 1 { @@ -408,6 +369,73 @@ pub fn parse_link_type(link_attr_rel_value: &str) -> Vec { types } +pub fn parse_srcset(srcset: &str) -> Vec { + let mut srcset_items: Vec = vec![]; + + // Parse srcset + let mut partials: Vec<&str> = srcset.split(WHITESPACES).collect(); + let mut path: Option<&str> = None; + let mut descriptor: Option<&str> = None; + let mut i = 0; + while i < partials.len() { + let partial = partials[i]; + + // Skip empty strings + if partial.is_empty() { + continue; + } + + if partial.ends_with(',') { + if path.is_none() { + path = Some(partial.strip_suffix(',').unwrap()); + descriptor = Some("") + } else { + descriptor = Some(partial.strip_suffix(',').unwrap()); + } + } else if path.is_none() { + path = Some(partial); + } else { + let mut chunks: Vec<&str> = partial.split(',').collect(); + + if !chunks.is_empty() && chunks.first().unwrap().ends_with(['x', 'w']) { + descriptor = Some(chunks.first().unwrap()); + + chunks.remove(0); + } + + if !chunks.is_empty() { + if descriptor.is_some() { + partials.insert(0, &partial[descriptor.unwrap().len()..]); + } else { + partials.insert(0, partial); + } + } + } + + if path.is_some() && descriptor.is_some() { + srcset_items.push(SrcSetItem { + path: path.unwrap(), + descriptor: descriptor.unwrap(), + }); + + path = None; + descriptor = None; + } + + i += 1; + } + + // Final attempt to process what was found + if path.is_some() { + srcset_items.push(SrcSetItem { + path: path.unwrap(), + descriptor: descriptor.unwrap_or_default(), + }); + } + + srcset_items +} + pub fn set_base_url(document: &Handle, desired_base_href: String) -> RcDom { let mut buf: Vec = Vec::new(); serialize( diff --git a/src/main.rs b/src/main.rs index 60cdaa68..6fb4f3ef 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,23 +1,25 @@ use std::fs; -use std::io::{self, Error as IoError, Write}; +use std::io::{self, Error as IoError, Read, Write}; use std::process; -use chrono::prelude::*; +use chrono::{SecondsFormat, Utc}; use clap::Parser; use tempfile::Builder; use monolith::cache::Cache; use monolith::cookies::parse_cookie_file_contents; -use monolith::core::{create_monolithic_document, print_error_message, Options}; +use monolith::core::{ + create_monolithic_document, create_monolithic_document_from_data, print_error_message, Options, +}; const ASCII: &str = " \ - _____ ______________ __________ ___________________ ___ -| \\ / \\ | | | | | | -| \\_/ __ \\_| __ | | ___ ___ |__| | -| | | | | | | | | | | | -| |\\ /| |__| _ |__| |____| | | | | __ | -| | \\___/ | | \\ | | | | | | | -|___| |__________| \\_____________________| |___| |___| |___| + _____ _____________ __________ ___________________ ___ +| \\ / \\ | | | | | | +| \\/ __ \\| __ | | ___ ___ |__| | +| | | | | | | | | | | | +| |\\ /| |__| |__| |___| | | | | __ | +| | \\__/ | |\\ | | | | | | | +|___| |__________| \\___________________| |___| |___| |___| "; #[derive(Parser)] @@ -167,10 +169,19 @@ impl Output { } const CACHE_ASSET_FILE_SIZE_THRESHOLD: usize = 1024 * 10; // Minimum file size for on-disk caching (in bytes) -const DEFAULT_NETWORK_TIMEOUT: u64 = 120; +const DEFAULT_NETWORK_TIMEOUT: u64 = 120; // Maximum time to retrieve each remote asset (in seconds) const DEFAULT_USER_AGENT: &str = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:135.0) Gecko/20100101 Firefox/135.0"; +pub fn read_stdin() -> Vec { + let mut buffer: Vec = vec![]; + + match io::stdin().lock().read_to_end(&mut buffer) { + Ok(_) => buffer, + Err(_) => buffer, + } +} + fn main() { let cli = Cli::parse(); let cookie_file_path; @@ -263,22 +274,46 @@ fn main() { } // Retrieve target from source and output result - match create_monolithic_document(cli.target, &options, &mut cache) { - Ok((result, title)) => { - // Define output - let mut output = Output::new( - &destination.unwrap_or(String::new()), - &title.unwrap_or_default(), - ) - .expect("could not prepare output"); + if cli.target == "-" { + // Read input from pipe (STDIN) + let data: Vec = read_stdin(); + + match create_monolithic_document_from_data(data, &options, &mut cache, None, None) { + Ok((result, title)) => { + // Define output + let mut output = Output::new( + &destination.unwrap_or(String::new()), + &title.unwrap_or_default(), + ) + .expect("could not prepare output"); + + // Write result into STDOUT or file + output.write(&result).expect("could not write output"); + } + Err(error) => { + print_error_message(&format!("Error: {}", error), &options); - // Write result into STDOUT or file - output.write(&result).expect("could not write output"); + exit_code = 1; + } } - Err(error) => { - print_error_message(&format!("Error: {}", error), &options); + } else { + match create_monolithic_document(cli.target, &mut options, &mut cache) { + Ok((result, title)) => { + // Define output + let mut output = Output::new( + &destination.unwrap_or(String::new()), + &title.unwrap_or_default(), + ) + .expect("could not prepare output"); + + // Write result into STDOUT or file + output.write(&result).expect("could not write output"); + } + Err(error) => { + print_error_message(&format!("Error: {}", error), &options); - exit_code = 1; + exit_code = 1; + } } } diff --git a/src/url.rs b/src/url.rs index 7c28ec5d..75f58a68 100644 --- a/src/url.rs +++ b/src/url.rs @@ -1,4 +1,4 @@ -use base64::prelude::*; +use base64::{prelude::BASE64_STANDARD, Engine}; use percent_encoding::percent_decode_str; use url::Url; diff --git a/tests/cli/base_url.rs b/tests/cli/base_url.rs index c5631661..44192425 100644 --- a/tests/cli/base_url.rs +++ b/tests/cli/base_url.rs @@ -17,7 +17,7 @@ mod passing { let out = cmd .arg("-M") .arg("-b") - .arg("http://localhost:8000/") + .arg("http://localhost:30701/") .arg("data:text/html,Hello%2C%20World!") .output() .unwrap(); @@ -28,8 +28,9 @@ mod passing { // STDOUT should contain newly added base URL assert_eq!( String::from_utf8_lossy(&out.stdout), - r#"Hello, World! -"# + "\ + \ + Hello, World!\n" ); // Exit code should be 0 @@ -41,7 +42,7 @@ mod passing { let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap(); let out = cmd .arg("-M") - .arg(r#"data:text/html,Hello%2C%20World!"#) + .arg("data:text/html,Hello%2C%20World!") .output() .unwrap(); @@ -51,8 +52,9 @@ mod passing { // STDOUT should contain newly added base URL assert_eq!( String::from_utf8_lossy(&out.stdout), - r#"Hello, World! -"# + "\ + \ + Hello, World!\n" ); // Exit code should be 0 @@ -66,7 +68,7 @@ mod passing { .arg("-M") .arg("-b") .arg("http://localhost/") - .arg(r#"data:text/html,Hello%2C%20World!"#) + .arg("data:text/html,Hello%2C%20World!") .output() .unwrap(); @@ -76,8 +78,9 @@ mod passing { // STDOUT should contain newly added base URL assert_eq!( String::from_utf8_lossy(&out.stdout), - r#"Hello, World! -"# + "\ + \ + Hello, World!\n" ); // Exit code should be 0 @@ -91,7 +94,7 @@ mod passing { .arg("-M") .arg("-b") .arg("") - .arg(r#"data:text/html,Hello%2C%20World!"#) + .arg("data:text/html,Hello%2C%20World!") .output() .unwrap(); @@ -101,8 +104,9 @@ mod passing { // STDOUT should contain newly added base URL assert_eq!( String::from_utf8_lossy(&out.stdout), - r#"Hello, World! -"# + "\ + \ + Hello, World!\n" ); // Exit code should be 0 diff --git a/tests/html/embed_srcset.rs b/tests/html/embed_srcset.rs index 8f5b5bb8..48f025ee 100644 --- a/tests/html/embed_srcset.rs +++ b/tests/html/embed_srcset.rs @@ -34,8 +34,8 @@ mod passing { assert_eq!( embedded_css, format!( - "{} 1x, {} 1.5x, {} 2x", - EMPTY_IMAGE_DATA_URL, EMPTY_IMAGE_DATA_URL, EMPTY_IMAGE_DATA_URL, + "{dataurl} 1x, {dataurl} 1.5x, {dataurl} 2x", + dataurl = EMPTY_IMAGE_DATA_URL, ), ); } @@ -58,7 +58,7 @@ mod passing { assert_eq!( embedded_css, - format!("{}, {} 1.5x", EMPTY_IMAGE_DATA_URL, EMPTY_IMAGE_DATA_URL), + format!("{dataurl}, {dataurl} 1.5x", dataurl = EMPTY_IMAGE_DATA_URL), ); } @@ -80,7 +80,29 @@ mod passing { assert_eq!( embedded_css, - format!("{} 1x, {} 2x", EMPTY_IMAGE_DATA_URL, EMPTY_IMAGE_DATA_URL), + format!("{dataurl} 1x, {dataurl} 2x", dataurl = EMPTY_IMAGE_DATA_URL), + ); + } + + #[test] + fn narrow_whitespaces_within_file_names() { + let cache = &mut Some(Cache::new(0, None)); + let client = Client::new(); + let srcset_value = "small\u{202f}s.png 1x, large\u{202f}l.png 2x"; + let mut options = Options::default(); + options.no_images = true; + options.silent = true; + let embedded_css = html::embed_srcset( + cache, + &client, + &Url::parse("data:,").unwrap(), + srcset_value, + &options, + ); + + assert_eq!( + embedded_css, + format!("{dataurl} 1x, {dataurl} 2x", dataurl = EMPTY_IMAGE_DATA_URL), ); } @@ -88,7 +110,7 @@ mod passing { fn tabs_and_newlines_after_commas() { let cache = &mut Some(Cache::new(0, None)); let client = Client::new(); - let srcset_value = "small,s.png 1x,\nmedium,m.png 2x,\nlarge,l.png 3x"; + let srcset_value = "small-s.png 1x,\tmedium,m.png 2x,\nlarge-l.png 3x"; let mut options = Options::default(); options.no_images = true; options.silent = true; @@ -103,8 +125,8 @@ mod passing { assert_eq!( embedded_css, format!( - "{} 1x, {} 2x, {} 3x", - EMPTY_IMAGE_DATA_URL, EMPTY_IMAGE_DATA_URL, EMPTY_IMAGE_DATA_URL + "{dataurl} 1x, {dataurl} 2x, {dataurl} 3x", + dataurl = EMPTY_IMAGE_DATA_URL ), ); } @@ -113,7 +135,7 @@ mod passing { fn no_whitespace_after_commas() { let cache = &mut Some(Cache::new(0, None)); let client = Client::new(); - let srcset_value = "small,s.png 1x,medium,m.png 2x,large,l.png 3x"; + let srcset_value = "small-s.png 1x,medium-m.png 2x,large-l.png 3x"; let mut options = Options::default(); options.no_images = true; options.silent = true; @@ -128,8 +150,8 @@ mod passing { assert_eq!( embedded_css, format!( - "{} 1x, {} 2x, {} 3x", - EMPTY_IMAGE_DATA_URL, EMPTY_IMAGE_DATA_URL, EMPTY_IMAGE_DATA_URL + "{dataurl} 1x, {dataurl} 2x, {dataurl} 3x", + dataurl = EMPTY_IMAGE_DATA_URL ), ); } @@ -138,7 +160,7 @@ mod passing { fn last_without_descriptor() { let cache = &mut Some(Cache::new(0, None)); let client = Client::new(); - let srcset_value = "small,s.png 1x, medium,m.png 2x, large,l.png"; + let srcset_value = "small-s.png 400w, medium-m.png 800w, large-l.png"; let mut options = Options::default(); options.no_images = true; options.silent = true; @@ -153,8 +175,8 @@ mod passing { assert_eq!( embedded_css, format!( - "{} 1x, {} 2x, {}", - EMPTY_IMAGE_DATA_URL, EMPTY_IMAGE_DATA_URL, EMPTY_IMAGE_DATA_URL + "{dataurl} 400w, {dataurl} 800w, {dataurl}", + dataurl = EMPTY_IMAGE_DATA_URL ), ); } @@ -195,7 +217,7 @@ mod failing { assert_eq!( embedded_css, - format!("{} 1x, {} 2x", EMPTY_IMAGE_DATA_URL, EMPTY_IMAGE_DATA_URL), + format!("{dataurl} 1x, {dataurl} 2x", dataurl = EMPTY_IMAGE_DATA_URL), ); } }