From a384cc1f770c0d2c1245f92613c05ec26db43b43 Mon Sep 17 00:00:00 2001 From: Sunshine Date: Sun, 9 Mar 2025 20:43:59 -0100 Subject: [PATCH 1/7] improve error handling --- src/core.rs | 64 ++++++++++++---------------------- src/main.rs | 11 ++++-- tests/cli/basic.rs | 20 ++++++++++- tests/cli/unusual_encodings.rs | 2 +- tests/core/options.rs | 4 --- 5 files changed, 50 insertions(+), 51 deletions(-) diff --git a/src/core.rs b/src/core.rs index cd80f67e..5dd8af9d 100644 --- a/src/core.rs +++ b/src/core.rs @@ -48,8 +48,7 @@ impl Error for MonolithError { pub struct Options { pub base_url: Option, pub blacklist_domains: bool, - // pub cache: Option, - pub cookies: Vec, + pub cookies: Vec, // TODO: move out of this struct pub domains: Option>, pub encoding: Option, pub ignore_errors: bool, @@ -66,7 +65,6 @@ pub struct Options { pub no_video: bool, pub output: String, pub silent: bool, - pub target: String, pub timeout: u64, pub unwrap_noscript: bool, pub user_agent: Option, @@ -104,30 +102,28 @@ const PLAINTEXT_MEDIA_TYPES: &[&str] = &[ ]; pub fn create_monolithic_document( + target: String, options: &Options, - mut cache: &mut Cache, + mut cache: &mut Cache, // TODO: make it Option-al ) -> Result, MonolithError> { // Check if target was provided - if options.target.len() == 0 { - if !options.silent { - eprintln!("No target specified"); - } - + if target.len() == 0 { return Err(MonolithError::new("no target specified")); } // Check if custom encoding value is acceptable if let Some(custom_encoding) = options.encoding.clone() { if !Encoding::for_label_no_replacement(custom_encoding.as_bytes()).is_some() { - eprintln!("Unknown encoding: {}", &custom_encoding); - - return Err(MonolithError::new("unknown encoding specified")); + return Err(MonolithError::new(&format!( + "unknown encoding \"{}\"", + &custom_encoding + ))); } } let mut use_stdin: bool = false; - let target_url = match options.target.as_str() { + let target_url = match target.as_str() { "-" => { // Read from pipe (stdin) use_stdin = true; @@ -138,11 +134,10 @@ pub fn create_monolithic_document( Ok(url) => match url.scheme() { "data" | "file" | "http" | "https" => url, unsupported_scheme => { - if !options.silent { - eprintln!("Unsupported target URL type: {}", unsupported_scheme); - } - - return Err(MonolithError::new("unsupported target URL type")); + return Err(MonolithError::new(&format!( + "unsupported target URL scheme \"{}\"", + unsupported_scheme + ))); } }, Err(_) => { @@ -155,13 +150,10 @@ pub fn create_monolithic_document( match Url::from_file_path(canonical_path) { Ok(url) => url, Err(_) => { - if !options.silent { - eprintln!( - "Could not generate file URL out of given path: {}", - &target - ); - } - + // eprintln!( + // "Could not generate file URL out of given path: {}", + // &target + // ); return Err(MonolithError::new( "could not generate file URL out of given path", )); @@ -169,10 +161,7 @@ pub fn create_monolithic_document( } } false => { - if !options.silent { - eprintln!("Local target is not a file: {}", &target); - } - + // eprintln!("Local target is not a file: {}", &target); return Err(MonolithError::new("local target is not a file")); } }, @@ -245,10 +234,6 @@ pub fn create_monolithic_document( document_encoding = charset; } Err(_) => { - if !options.silent { - eprintln!("Could not retrieve target document"); - } - return Err(MonolithError::new("could not retrieve target document")); } } @@ -306,15 +291,10 @@ pub fn create_monolithic_document( base_url = file_url; } Err(_) => { - if !options.silent { - eprintln!( - "Could not map given path to base URL: {}", - custom_base_url - ); - } - return Err(MonolithError::new( - "could not map given path to base URL", - )); + return Err(MonolithError::new(&format!( + "could not map given path to base URL \"{}\"", + custom_base_url + ))); } } } diff --git a/src/main.rs b/src/main.rs index 74e1e754..1cac02a1 100644 --- a/src/main.rs +++ b/src/main.rs @@ -66,6 +66,7 @@ fn main() { // Process CLI flags and options let mut cookie_file_path: Option = None; let mut options: Options = Options::default(); + let target; { let app = App::new(env!("CARGO_PKG_NAME")) .version(env!("CARGO_PKG_VERSION")) @@ -116,7 +117,7 @@ fn main() { .get_matches(); // Process the command - options.target = app + target = app .value_of("target") .expect("please set target") .to_string(); @@ -208,7 +209,7 @@ fn main() { } } - match create_monolithic_document(&options, &mut cache) { + match create_monolithic_document(target, &options, &mut cache) { Ok(result) => { // Define output let mut output = Output::new(&options.output).expect("Could not prepare output"); @@ -216,7 +217,11 @@ fn main() { // Write result into STDOUT or file output.write(&result).expect("Could not write output"); } - Err(_) => { + Err(error) => { + if !options.silent { + eprintln!("Error: {}", error); + } + process::exit(1); } } diff --git a/tests/cli/basic.rs b/tests/cli/basic.rs index 7c6d5971..e5d38a05 100644 --- a/tests/cli/basic.rs +++ b/tests/cli/basic.rs @@ -132,7 +132,25 @@ mod failing { // STDERR should contain error description assert_eq!( String::from_utf8_lossy(&out.stderr), - "No target specified\n" + "Error: no target specified\n" + ); + + // STDOUT should be empty + assert_eq!(String::from_utf8_lossy(&out.stdout), ""); + + // Exit code should be 1 + out.assert().code(1); + } + + #[test] + fn unsupported_scheme() { + let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap(); + let out = cmd.arg("mailto:snshn@tutanota.com").output().unwrap(); + + // STDERR should contain error description + assert_eq!( + String::from_utf8_lossy(&out.stderr), + "Error: unsupported target URL scheme \"mailto\"\n" ); // STDOUT should be empty diff --git a/tests/cli/unusual_encodings.rs b/tests/cli/unusual_encodings.rs index 2b3e5bdf..aa487ee8 100644 --- a/tests/cli/unusual_encodings.rs +++ b/tests/cli/unusual_encodings.rs @@ -170,7 +170,7 @@ mod passing { // STDERR should contain error message assert_eq!( String::from_utf8_lossy(&out.stderr), - "Unknown encoding: utf0\n" + "Error: unknown encoding \"utf0\"\n" ); // STDOUT should be empty diff --git a/tests/core/options.rs b/tests/core/options.rs index 54dc2f17..68580595 100644 --- a/tests/core/options.rs +++ b/tests/core/options.rs @@ -16,8 +16,6 @@ mod passing { assert_eq!(options.no_audio, false); assert_eq!(options.base_url, None); assert_eq!(options.no_css, false); - // assert_eq!(options.cache, None); - // assert_eq!(options.cookies, None); assert_eq!(options.encoding, None); assert_eq!(options.no_frames, false); assert_eq!(options.no_fonts, false); @@ -31,7 +29,5 @@ mod passing { assert_eq!(options.timeout, 0); assert_eq!(options.user_agent, None); assert_eq!(options.no_video, false); - - assert_eq!(options.target, "".to_string()); } } From c406ff404e88a608e99e4f103842b172ccd4b2c6 Mon Sep 17 00:00:00 2001 From: Sunshine Date: Sun, 9 Mar 2025 20:47:50 -0100 Subject: [PATCH 2/7] switch to Rust 2024 --- Cargo.toml | 2 +- src/core.rs | 2 +- src/css.rs | 6 +++--- src/html.rs | 20 ++++++++++---------- src/main.rs | 2 +- tests/cli/local_files.rs | 2 +- tests/core/retrieve_asset.rs | 8 ++++---- tests/css/embed_css.rs | 2 +- tests/html/add_favicon.rs | 2 +- tests/html/compose_csp.rs | 5 ++++- tests/html/embed_srcset.rs | 4 ++-- tests/html/get_node_attr.rs | 2 +- tests/html/get_node_name.rs | 2 +- tests/html/serialize_document.rs | 18 +++--------------- tests/html/set_node_attr.rs | 4 ++-- tests/html/walk_and_embed_assets.rs | 2 +- 16 files changed, 37 insertions(+), 46 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index f38a645a..8f364b08 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,7 +9,7 @@ authors = [ "rhysd ", "Andriy Rakhnin ", ] -edition = "2021" +edition = "2024" description = "CLI tool for saving web pages as a single HTML file" homepage = "https://github.com/Y2Z/monolith" repository = "https://github.com/Y2Z/monolith" diff --git a/src/core.rs b/src/core.rs index 5dd8af9d..773f05e3 100644 --- a/src/core.rs +++ b/src/core.rs @@ -8,7 +8,7 @@ use std::time::Duration; use encoding_rs::Encoding; use markup5ever_rcdom::RcDom; use reqwest::blocking::Client; -use reqwest::header::{HeaderMap, HeaderValue, CONTENT_TYPE, COOKIE, REFERER, USER_AGENT}; +use reqwest::header::{CONTENT_TYPE, COOKIE, HeaderMap, HeaderValue, REFERER, USER_AGENT}; use url::Url; use crate::cache::Cache; diff --git a/src/css.rs b/src/css.rs index 9915976c..2cab28a0 100644 --- a/src/css.rs +++ b/src/css.rs @@ -1,12 +1,12 @@ use cssparser::{ - serialize_identifier, serialize_string, ParseError, Parser, ParserInput, SourcePosition, Token, + ParseError, Parser, ParserInput, SourcePosition, Token, serialize_identifier, serialize_string, }; use reqwest::blocking::Client; use url::Url; use crate::cache::Cache; -use crate::core::{retrieve_asset, Options}; -use crate::url::{create_data_url, resolve_url, EMPTY_IMAGE_DATA_URL}; +use crate::core::{Options, retrieve_asset}; +use crate::url::{EMPTY_IMAGE_DATA_URL, create_data_url, resolve_url}; const CSS_PROPS_WITH_IMAGE_URLS: &[&str] = &[ // Universal diff --git a/src/html.rs b/src/html.rs index 41a7fa1e..1946d143 100644 --- a/src/html.rs +++ b/src/html.rs @@ -3,23 +3,23 @@ use chrono::prelude::*; use encoding_rs::Encoding; use html5ever::interface::QualName; use html5ever::parse_document; -use html5ever::serialize::{serialize, SerializeOpts}; -use html5ever::tendril::{format_tendril, TendrilSink}; +use html5ever::serialize::{SerializeOpts, serialize}; +use html5ever::tendril::{TendrilSink, format_tendril}; use html5ever::tree_builder::{Attribute, TreeSink}; -use html5ever::{local_name, namespace_url, ns, LocalName}; +use html5ever::{LocalName, local_name, namespace_url, ns}; use markup5ever_rcdom::{Handle, NodeData, RcDom, SerializableHandle}; use regex::Regex; -use reqwest::blocking::Client; use reqwest::Url; +use reqwest::blocking::Client; use sha2::{Digest, Sha256, Sha384, Sha512}; use std::default::Default; use crate::cache::Cache; -use crate::core::{parse_content_type, retrieve_asset, Options}; +use crate::core::{Options, parse_content_type, retrieve_asset}; use crate::css::embed_css; use crate::js::attr_is_event_handler; use crate::url::{ - clean_url, create_data_url, is_url_and_has_protocol, resolve_url, EMPTY_IMAGE_DATA_URL, + EMPTY_IMAGE_DATA_URL, clean_url, create_data_url, is_url_and_has_protocol, resolve_url, }; #[derive(PartialEq, Eq)] @@ -139,7 +139,7 @@ pub fn create_metadata_tag(url: &Url) -> String { format!( "", if clean_url.scheme() == "http" || clean_url.scheme() == "https" { - &clean_url.as_str() + clean_url.as_str() } else { "local source" }, @@ -357,7 +357,7 @@ pub fn get_child_node_by_name(parent: &Handle, node_name: &str) -> Option Option { match &node.data { - NodeData::Element { ref attrs, .. } => { + NodeData::Element { attrs, .. } => { for attr in attrs.borrow().iter() { if &*attr.name.local == attr_name { return Some(attr.value.to_string()); @@ -371,7 +371,7 @@ pub fn get_node_attr(node: &Handle, attr_name: &str) -> Option { pub fn get_node_name(node: &Handle) -> Option<&'_ str> { match &node.data { - NodeData::Element { ref name, .. } => Some(name.local.as_ref()), + NodeData::Element { name, .. } => Some(name.local.as_ref()), _ => None, } } @@ -534,7 +534,7 @@ pub fn set_charset(mut dom: RcDom, desired_charset: String) -> RcDom { pub fn set_node_attr(node: &Handle, attr_name: &str, attr_value: Option) { match &node.data { - NodeData::Element { ref attrs, .. } => { + NodeData::Element { attrs, .. } => { let attrs_mut = &mut attrs.borrow_mut(); let mut i = 0; let mut found_existing_attr: bool = false; diff --git a/src/main.rs b/src/main.rs index 1cac02a1..71db7843 100644 --- a/src/main.rs +++ b/src/main.rs @@ -8,7 +8,7 @@ use tempfile::Builder; use monolith::cache::Cache; use monolith::cookies::parse_cookie_file_contents; -use monolith::core::{create_monolithic_document, Options}; +use monolith::core::{Options, create_monolithic_document}; enum Output { Stdout(io::Stdout), diff --git a/tests/cli/local_files.rs b/tests/cli/local_files.rs index 20871c78..3ae388c2 100644 --- a/tests/cli/local_files.rs +++ b/tests/cli/local_files.rs @@ -10,7 +10,7 @@ mod passing { use assert_cmd::prelude::*; use std::env; use std::fs; - use std::path::{Path, MAIN_SEPARATOR}; + use std::path::{MAIN_SEPARATOR, Path}; use std::process::Command; use url::Url; diff --git a/tests/core/retrieve_asset.rs b/tests/core/retrieve_asset.rs index 428c4b62..337391bb 100644 --- a/tests/core/retrieve_asset.rs +++ b/tests/core/retrieve_asset.rs @@ -7,12 +7,12 @@ #[cfg(test)] mod passing { - use reqwest::blocking::Client; use reqwest::Url; + use reqwest::blocking::Client; use std::env; use monolith::cache::Cache; - use monolith::core::{retrieve_asset, Options}; + use monolith::core::{Options, retrieve_asset}; use monolith::url; #[test] @@ -99,11 +99,11 @@ mod passing { #[cfg(test)] mod failing { - use reqwest::blocking::Client; use reqwest::Url; + use reqwest::blocking::Client; use monolith::cache::Cache; - use monolith::core::{retrieve_asset, Options}; + use monolith::core::{Options, retrieve_asset}; #[test] fn read_local_file_with_data_url_parent() { diff --git a/tests/css/embed_css.rs b/tests/css/embed_css.rs index a43d5a5d..a4df7be6 100644 --- a/tests/css/embed_css.rs +++ b/tests/css/embed_css.rs @@ -7,8 +7,8 @@ #[cfg(test)] mod passing { - use reqwest::blocking::Client; use reqwest::Url; + use reqwest::blocking::Client; use monolith::cache::Cache; use monolith::core::Options; diff --git a/tests/html/add_favicon.rs b/tests/html/add_favicon.rs index 02ab8657..7b2fde44 100644 --- a/tests/html/add_favicon.rs +++ b/tests/html/add_favicon.rs @@ -7,7 +7,7 @@ #[cfg(test)] mod passing { - use html5ever::serialize::{serialize, SerializeOpts}; + use html5ever::serialize::{SerializeOpts, serialize}; use markup5ever_rcdom::SerializableHandle; use monolith::html; diff --git a/tests/html/compose_csp.rs b/tests/html/compose_csp.rs index 531d7cf7..fffa408c 100644 --- a/tests/html/compose_csp.rs +++ b/tests/html/compose_csp.rs @@ -78,6 +78,9 @@ mod passing { options.no_images = true; let csp_content = html::compose_csp(&options); - assert_eq!(csp_content, "default-src 'unsafe-eval' 'unsafe-inline' data:; style-src 'none'; font-src 'none'; frame-src 'none'; child-src 'none'; script-src 'none'; img-src data:;"); + assert_eq!( + csp_content, + "default-src 'unsafe-eval' 'unsafe-inline' data:; style-src 'none'; font-src 'none'; frame-src 'none'; child-src 'none'; script-src 'none'; img-src data:;" + ); } } diff --git a/tests/html/embed_srcset.rs b/tests/html/embed_srcset.rs index ec0e9f12..512fc7d3 100644 --- a/tests/html/embed_srcset.rs +++ b/tests/html/embed_srcset.rs @@ -7,8 +7,8 @@ #[cfg(test)] mod passing { - use reqwest::blocking::Client; use reqwest::Url; + use reqwest::blocking::Client; use monolith::cache::Cache; use monolith::core::Options; @@ -169,8 +169,8 @@ mod passing { #[cfg(test)] mod failing { - use reqwest::blocking::Client; use reqwest::Url; + use reqwest::blocking::Client; use monolith::cache::Cache; use monolith::core::Options; diff --git a/tests/html/get_node_attr.rs b/tests/html/get_node_attr.rs index e0dcd9f7..ef30f7b6 100644 --- a/tests/html/get_node_attr.rs +++ b/tests/html/get_node_attr.rs @@ -27,7 +27,7 @@ mod passing { test_walk(child, &mut *i); } } - NodeData::Element { ref name, .. } => { + NodeData::Element { name, .. } => { let node_name = name.local.as_ref().to_string(); if node_name == "body" { diff --git a/tests/html/get_node_name.rs b/tests/html/get_node_name.rs index ed79bec5..f8d2a809 100644 --- a/tests/html/get_node_name.rs +++ b/tests/html/get_node_name.rs @@ -26,7 +26,7 @@ mod passing { test_walk(child, &mut *i); } } - NodeData::Element { ref name, .. } => { + NodeData::Element { name, .. } => { let node_name = name.local.as_ref().to_string(); let parent = html::get_parent_node(node); let parent_node_name = html::get_node_name(&parent); diff --git a/tests/html/serialize_document.rs b/tests/html/serialize_document.rs index bd139137..285e4d26 100644 --- a/tests/html/serialize_document.rs +++ b/tests/html/serialize_document.rs @@ -33,11 +33,7 @@ mod passing { options.isolate = true; assert_eq!( - String::from_utf8_lossy(&html::serialize_document( - dom, - "".to_string(), - &options - )), + String::from_utf8_lossy(&html::serialize_document(dom, "".to_string(), &options)), "\ \ \ @@ -89,11 +85,7 @@ mod passing { options.no_frames = true; assert_eq!( - String::from_utf8_lossy(&html::serialize_document( - dom, - "".to_string(), - &options - )), + String::from_utf8_lossy(&html::serialize_document(dom, "".to_string(), &options)), "\ \ \ @@ -127,11 +119,7 @@ mod passing { options.no_images = true; assert_eq!( - String::from_utf8_lossy(&html::serialize_document( - dom, - "".to_string(), - &options - )), + String::from_utf8_lossy(&html::serialize_document(dom, "".to_string(), &options)), "\ \ \ diff --git a/tests/html/set_node_attr.rs b/tests/html/set_node_attr.rs index 114de70b..98edd52f 100644 --- a/tests/html/set_node_attr.rs +++ b/tests/html/set_node_attr.rs @@ -27,7 +27,7 @@ mod passing { test_walk(child, &mut *i); } } - NodeData::Element { ref name, .. } => { + NodeData::Element { name, .. } => { let node_name = name.local.as_ref().to_string(); if node_name == "html" { @@ -80,7 +80,7 @@ mod passing { test_walk(child, &mut *i); } } - NodeData::Element { ref name, .. } => { + NodeData::Element { name, .. } => { let node_name = name.local.as_ref().to_string(); if node_name == "body" { diff --git a/tests/html/walk_and_embed_assets.rs b/tests/html/walk_and_embed_assets.rs index 16b92537..84ff710f 100644 --- a/tests/html/walk_and_embed_assets.rs +++ b/tests/html/walk_and_embed_assets.rs @@ -7,7 +7,7 @@ #[cfg(test)] mod passing { - use html5ever::serialize::{serialize, SerializeOpts}; + use html5ever::serialize::{SerializeOpts, serialize}; use markup5ever_rcdom::SerializableHandle; use reqwest::blocking::Client; use url::Url; From 37ebd03d03e7568eff94d49b1bc6af40389e7171 Mon Sep 17 00:00:00 2001 From: Sunshine Date: Sun, 9 Mar 2025 21:01:48 -0100 Subject: [PATCH 3/7] remove output from options, improve error messages --- src/core.rs | 29 +++++++++++++---------------- src/main.rs | 17 +++++++++-------- tests/core/options.rs | 1 - 3 files changed, 22 insertions(+), 25 deletions(-) diff --git a/src/core.rs b/src/core.rs index 773f05e3..0c9785a9 100644 --- a/src/core.rs +++ b/src/core.rs @@ -63,7 +63,6 @@ pub struct Options { pub no_js: bool, pub no_metadata: bool, pub no_video: bool, - pub output: String, pub silent: bool, pub timeout: u64, pub unwrap_noscript: bool, @@ -102,12 +101,12 @@ const PLAINTEXT_MEDIA_TYPES: &[&str] = &[ ]; pub fn create_monolithic_document( - target: String, + source: String, options: &Options, mut cache: &mut Cache, // TODO: make it Option-al ) -> Result, MonolithError> { - // Check if target was provided - if target.len() == 0 { + // Check if source was provided + if source.len() == 0 { return Err(MonolithError::new("no target specified")); } @@ -123,7 +122,7 @@ pub fn create_monolithic_document( let mut use_stdin: bool = false; - let target_url = match target.as_str() { + let target_url = match source.as_str() { "-" => { // Read from pipe (stdin) use_stdin = true; @@ -150,26 +149,24 @@ pub fn create_monolithic_document( match Url::from_file_path(canonical_path) { Ok(url) => url, Err(_) => { - // eprintln!( - // "Could not generate file URL out of given path: {}", - // &target - // ); - return Err(MonolithError::new( - "could not generate file URL out of given path", - )); + return Err(MonolithError::new(&format!( + "could not generate file URL out of given path \"{}\"", + &target + ))); } } } false => { - // eprintln!("Local target is not a file: {}", &target); - return Err(MonolithError::new("local target is not a file")); + return Err(MonolithError::new(&format!( + "local target \"{}\" is not a file", + &target + ))); } }, false => { // It is not a FS path, now we do what browsers do: // prepend "http://" and hope it points to a website - Url::parse(&format!("http://{hopefully_url}", hopefully_url = &target)) - .unwrap() + Url::parse(&format!("http://{}", &target)).unwrap() } } } diff --git a/src/main.rs b/src/main.rs index 71db7843..b7e58bf7 100644 --- a/src/main.rs +++ b/src/main.rs @@ -16,11 +16,11 @@ enum Output { } impl Output { - fn new(file_path: &str) -> Result { - if file_path.is_empty() || file_path.eq("-") { + fn new(destination: &str) -> Result { + if destination.is_empty() || destination.eq("-") { Ok(Output::Stdout(io::stdout())) } else { - Ok(Output::File(fs::File::create(file_path)?)) + Ok(Output::File(fs::File::create(destination)?)) } } @@ -66,7 +66,8 @@ fn main() { // Process CLI flags and options let mut cookie_file_path: Option = None; let mut options: Options = Options::default(); - let target; + let source; + let destination; { let app = App::new(env!("CARGO_PKG_NAME")) .version(env!("CARGO_PKG_VERSION")) @@ -117,7 +118,7 @@ fn main() { .get_matches(); // Process the command - target = app + source = app .value_of("target") .expect("please set target") .to_string(); @@ -145,7 +146,7 @@ fn main() { options.no_js = app.is_present("no-js"); options.insecure = app.is_present("insecure"); options.no_metadata = app.is_present("no-metadata"); - options.output = app.value_of("output").unwrap_or("").to_string(); + destination = app.value_of("output").unwrap_or("").to_string(); options.silent = app.is_present("silent"); options.timeout = app .value_of("timeout") @@ -209,10 +210,10 @@ fn main() { } } - match create_monolithic_document(target, &options, &mut cache) { + match create_monolithic_document(source, &options, &mut cache) { Ok(result) => { // Define output - let mut output = Output::new(&options.output).expect("Could not prepare output"); + let mut output = Output::new(&destination).expect("Could not prepare output"); // Write result into STDOUT or file output.write(&result).expect("Could not write output"); diff --git a/tests/core/options.rs b/tests/core/options.rs index 68580595..5366106c 100644 --- a/tests/core/options.rs +++ b/tests/core/options.rs @@ -24,7 +24,6 @@ mod passing { assert_eq!(options.no_js, false); assert_eq!(options.insecure, false); assert_eq!(options.no_metadata, false); - assert_eq!(options.output, "".to_string()); assert_eq!(options.silent, false); assert_eq!(options.timeout, 0); assert_eq!(options.user_agent, None); From db5d55c030ea1f2736ded9dab50fd44e324c1528 Mon Sep 17 00:00:00 2001 From: Sunshine Date: Sun, 9 Mar 2025 21:16:13 -0100 Subject: [PATCH 4/7] use rustup for installing Rust on NetBSD --- .github/workflows/ci-netbsd.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci-netbsd.yml b/.github/workflows/ci-netbsd.yml index dc399306..af3caf1f 100644 --- a/.github/workflows/ci-netbsd.yml +++ b/.github/workflows/ci-netbsd.yml @@ -28,7 +28,9 @@ jobs: with: usesh: true prepare: | - /usr/sbin/pkg_add rust mktools gmake pkgconf cwrappers + /usr/sbin/pkg_add curl cwrappers gmake mktools pkgconf + curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y run: | + export PATH="/root/.cargo/bin:$PATH" cargo build --all --locked --verbose --no-default-features --features cli cargo test --all --locked --verbose --no-default-features --features cli From 824663688b53c429be69fc05cbd54c2227ca3251 Mon Sep 17 00:00:00 2001 From: Sunshine Date: Sun, 9 Mar 2025 22:12:33 -0100 Subject: [PATCH 5/7] roll back Rust edition back to 2021 (the world isn't ready yet) --- .github/workflows/ci-netbsd.yml | 4 +--- Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci-netbsd.yml b/.github/workflows/ci-netbsd.yml index af3caf1f..e2a4e53c 100644 --- a/.github/workflows/ci-netbsd.yml +++ b/.github/workflows/ci-netbsd.yml @@ -28,9 +28,7 @@ jobs: with: usesh: true prepare: | - /usr/sbin/pkg_add curl cwrappers gmake mktools pkgconf - curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y + /usr/sbin/pkg_add cwrappers gmake mktools pkgconf rust run: | - export PATH="/root/.cargo/bin:$PATH" cargo build --all --locked --verbose --no-default-features --features cli cargo test --all --locked --verbose --no-default-features --features cli diff --git a/Cargo.toml b/Cargo.toml index 8f364b08..f38a645a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,7 +9,7 @@ authors = [ "rhysd ", "Andriy Rakhnin ", ] -edition = "2024" +edition = "2021" description = "CLI tool for saving web pages as a single HTML file" homepage = "https://github.com/Y2Z/monolith" repository = "https://github.com/Y2Z/monolith" From c9d7d87ec50dcf0249523d813938fac77857f7ed Mon Sep 17 00:00:00 2001 From: Sunshine Date: Sun, 9 Mar 2025 22:12:51 -0100 Subject: [PATCH 6/7] update user agent to newer version of Firefox --- src/main.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main.rs b/src/main.rs index b7e58bf7..ad8f6f07 100644 --- a/src/main.rs +++ b/src/main.rs @@ -58,7 +58,7 @@ const ASCII: &'static str = " \ const CACHE_ASSET_FILE_SIZE_THRESHOLD: usize = 1024 * 50; // Minimum file size for on-disk caching (in bytes) const DEFAULT_NETWORK_TIMEOUT: u64 = 120; const DEFAULT_USER_AGENT: &'static str = - "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:73.0) Gecko/20100101 Firefox/73.0"; + "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:135.0) Gecko/20100101 Firefox/135.0"; const ENV_VAR_NO_COLOR: &str = "NO_COLOR"; const ENV_VAR_TERM: &str = "TERM"; From 4b747e41e38bc294b766804e833ade53b0016020 Mon Sep 17 00:00:00 2001 From: Sunshine Date: Sun, 9 Mar 2025 22:17:54 -0100 Subject: [PATCH 7/7] re-format code back to Rust 2021 --- src/core.rs | 2 +- src/css.rs | 6 +++--- src/html.rs | 12 ++++++------ src/main.rs | 2 +- tests/cli/local_files.rs | 2 +- tests/core/retrieve_asset.rs | 8 ++++---- tests/css/embed_css.rs | 2 +- tests/html/add_favicon.rs | 2 +- tests/html/embed_srcset.rs | 4 ++-- tests/html/walk_and_embed_assets.rs | 2 +- 10 files changed, 21 insertions(+), 21 deletions(-) diff --git a/src/core.rs b/src/core.rs index 0c9785a9..49bcecae 100644 --- a/src/core.rs +++ b/src/core.rs @@ -8,7 +8,7 @@ use std::time::Duration; use encoding_rs::Encoding; use markup5ever_rcdom::RcDom; use reqwest::blocking::Client; -use reqwest::header::{CONTENT_TYPE, COOKIE, HeaderMap, HeaderValue, REFERER, USER_AGENT}; +use reqwest::header::{HeaderMap, HeaderValue, CONTENT_TYPE, COOKIE, REFERER, USER_AGENT}; use url::Url; use crate::cache::Cache; diff --git a/src/css.rs b/src/css.rs index 2cab28a0..9915976c 100644 --- a/src/css.rs +++ b/src/css.rs @@ -1,12 +1,12 @@ use cssparser::{ - ParseError, Parser, ParserInput, SourcePosition, Token, serialize_identifier, serialize_string, + serialize_identifier, serialize_string, ParseError, Parser, ParserInput, SourcePosition, Token, }; use reqwest::blocking::Client; use url::Url; use crate::cache::Cache; -use crate::core::{Options, retrieve_asset}; -use crate::url::{EMPTY_IMAGE_DATA_URL, create_data_url, resolve_url}; +use crate::core::{retrieve_asset, Options}; +use crate::url::{create_data_url, resolve_url, EMPTY_IMAGE_DATA_URL}; const CSS_PROPS_WITH_IMAGE_URLS: &[&str] = &[ // Universal diff --git a/src/html.rs b/src/html.rs index 1946d143..1f350fc1 100644 --- a/src/html.rs +++ b/src/html.rs @@ -3,23 +3,23 @@ use chrono::prelude::*; use encoding_rs::Encoding; use html5ever::interface::QualName; use html5ever::parse_document; -use html5ever::serialize::{SerializeOpts, serialize}; -use html5ever::tendril::{TendrilSink, format_tendril}; +use html5ever::serialize::{serialize, SerializeOpts}; +use html5ever::tendril::{format_tendril, TendrilSink}; use html5ever::tree_builder::{Attribute, TreeSink}; -use html5ever::{LocalName, local_name, namespace_url, ns}; +use html5ever::{local_name, namespace_url, ns, LocalName}; use markup5ever_rcdom::{Handle, NodeData, RcDom, SerializableHandle}; use regex::Regex; -use reqwest::Url; use reqwest::blocking::Client; +use reqwest::Url; use sha2::{Digest, Sha256, Sha384, Sha512}; use std::default::Default; use crate::cache::Cache; -use crate::core::{Options, parse_content_type, retrieve_asset}; +use crate::core::{parse_content_type, retrieve_asset, Options}; use crate::css::embed_css; use crate::js::attr_is_event_handler; use crate::url::{ - EMPTY_IMAGE_DATA_URL, clean_url, create_data_url, is_url_and_has_protocol, resolve_url, + clean_url, create_data_url, is_url_and_has_protocol, resolve_url, EMPTY_IMAGE_DATA_URL, }; #[derive(PartialEq, Eq)] diff --git a/src/main.rs b/src/main.rs index ad8f6f07..529a34e4 100644 --- a/src/main.rs +++ b/src/main.rs @@ -8,7 +8,7 @@ use tempfile::Builder; use monolith::cache::Cache; use monolith::cookies::parse_cookie_file_contents; -use monolith::core::{Options, create_monolithic_document}; +use monolith::core::{create_monolithic_document, Options}; enum Output { Stdout(io::Stdout), diff --git a/tests/cli/local_files.rs b/tests/cli/local_files.rs index 3ae388c2..20871c78 100644 --- a/tests/cli/local_files.rs +++ b/tests/cli/local_files.rs @@ -10,7 +10,7 @@ mod passing { use assert_cmd::prelude::*; use std::env; use std::fs; - use std::path::{MAIN_SEPARATOR, Path}; + use std::path::{Path, MAIN_SEPARATOR}; use std::process::Command; use url::Url; diff --git a/tests/core/retrieve_asset.rs b/tests/core/retrieve_asset.rs index 337391bb..428c4b62 100644 --- a/tests/core/retrieve_asset.rs +++ b/tests/core/retrieve_asset.rs @@ -7,12 +7,12 @@ #[cfg(test)] mod passing { - use reqwest::Url; use reqwest::blocking::Client; + use reqwest::Url; use std::env; use monolith::cache::Cache; - use monolith::core::{Options, retrieve_asset}; + use monolith::core::{retrieve_asset, Options}; use monolith::url; #[test] @@ -99,11 +99,11 @@ mod passing { #[cfg(test)] mod failing { - use reqwest::Url; use reqwest::blocking::Client; + use reqwest::Url; use monolith::cache::Cache; - use monolith::core::{Options, retrieve_asset}; + use monolith::core::{retrieve_asset, Options}; #[test] fn read_local_file_with_data_url_parent() { diff --git a/tests/css/embed_css.rs b/tests/css/embed_css.rs index a4df7be6..a43d5a5d 100644 --- a/tests/css/embed_css.rs +++ b/tests/css/embed_css.rs @@ -7,8 +7,8 @@ #[cfg(test)] mod passing { - use reqwest::Url; use reqwest::blocking::Client; + use reqwest::Url; use monolith::cache::Cache; use monolith::core::Options; diff --git a/tests/html/add_favicon.rs b/tests/html/add_favicon.rs index 7b2fde44..02ab8657 100644 --- a/tests/html/add_favicon.rs +++ b/tests/html/add_favicon.rs @@ -7,7 +7,7 @@ #[cfg(test)] mod passing { - use html5ever::serialize::{SerializeOpts, serialize}; + use html5ever::serialize::{serialize, SerializeOpts}; use markup5ever_rcdom::SerializableHandle; use monolith::html; diff --git a/tests/html/embed_srcset.rs b/tests/html/embed_srcset.rs index 512fc7d3..ec0e9f12 100644 --- a/tests/html/embed_srcset.rs +++ b/tests/html/embed_srcset.rs @@ -7,8 +7,8 @@ #[cfg(test)] mod passing { - use reqwest::Url; use reqwest::blocking::Client; + use reqwest::Url; use monolith::cache::Cache; use monolith::core::Options; @@ -169,8 +169,8 @@ mod passing { #[cfg(test)] mod failing { - use reqwest::Url; use reqwest::blocking::Client; + use reqwest::Url; use monolith::cache::Cache; use monolith::core::Options; diff --git a/tests/html/walk_and_embed_assets.rs b/tests/html/walk_and_embed_assets.rs index 84ff710f..16b92537 100644 --- a/tests/html/walk_and_embed_assets.rs +++ b/tests/html/walk_and_embed_assets.rs @@ -7,7 +7,7 @@ #[cfg(test)] mod passing { - use html5ever::serialize::{SerializeOpts, serialize}; + use html5ever::serialize::{serialize, SerializeOpts}; use markup5ever_rcdom::SerializableHandle; use reqwest::blocking::Client; use url::Url;