diff --git a/README.md b/README.md index 65cd8656..44597ecf 100644 --- a/README.md +++ b/README.md @@ -199,7 +199,7 @@ cat some-site-page.html | monolith -aIiFfcMv -b https://some.site/ - > some-site - `-M`: Don't add timestamp and URL information - `-n`: Extract contents of NOSCRIPT elements - `-o`: Write output to `file` (use “-” for STDOUT) - - `-s`: Be quiet + - `-q`: Be quiet - `-t`: Adjust `network request timeout` - `-u`: Provide `custom User-Agent` - `-v`: Exclude videos diff --git a/src/cache.rs b/src/cache.rs index 901e7963..ed17cd6d 100644 --- a/src/cache.rs +++ b/src/cache.rs @@ -1,4 +1,6 @@ use std::collections::HashMap; +use std::fs::File; +use std::io::{BufWriter, Write}; use std::path::Path; use redb::{Database, Error, TableDefinition}; @@ -15,8 +17,10 @@ pub struct Cache { metadata: HashMap, // Dictionary of metadata (and occasionally data [mostly for very small files]) db: Option, // Pointer to database instance; None if not yet initialized or if failed to initialize db_ok: Option, // None by default, Some(true) if was able to initialize database, Some (false) if an error occured + db_file_path: Option, // Filesystem path to file used for storing database } +const FILE_WRITE_BUF_LEN: usize = 1024 * 100; // On-disk cache file write buffer size (in bytes) const TABLE: TableDefinition<&str, &[u8]> = TableDefinition::new("_"); impl Cache { @@ -26,6 +30,7 @@ impl Cache { metadata: HashMap::new(), db: None, db_ok: None, + db_file_path: db_file_path.clone(), }; if db_file_path.is_some() { @@ -52,14 +57,14 @@ impl Cache { data: if self.db_ok.is_some() && self.db_ok.unwrap() { None } else { - Some((data.to_owned()).to_vec()) + Some(data.to_owned().to_vec()) }, media_type: Some(media_type.to_owned()), charset: Some(charset), }; if (self.db_ok.is_none() || !self.db_ok.unwrap()) || data.len() <= self.min_file_size { - cache_metadata_item.data = Some((data.to_owned()).to_vec()); + cache_metadata_item.data = Some(data.to_owned().to_vec()); } else { match self.db.as_ref().unwrap().begin_write() { Ok(write_txn) => { @@ -71,7 +76,7 @@ impl Cache { } Err(..) => { // Fall back to caching everything in memory - cache_metadata_item.data = Some((data.to_owned()).to_vec()); + cache_metadata_item.data = Some(data.to_owned().to_vec()); } } } @@ -110,4 +115,40 @@ impl Cache { pub fn contains_key(&self, key: &str) -> bool { self.metadata.contains_key(key) } + + pub fn destroy_database_file(&mut self) { + if self.db_ok.is_none() || !self.db_ok.unwrap() { + return; + } + + // Destroy database instance (prevents writes into file) + self.db = None; + self.db_ok = Some(false); + + // Wipe database file + if let Some(db_file_path) = self.db_file_path.to_owned() { + // Overwrite file with zeroes + if let Ok(temp_file) = File::options() + .read(true) + .write(true) + .open(db_file_path.clone()) + { + let mut buffer = [0; FILE_WRITE_BUF_LEN]; + let mut remaining_size: usize = temp_file.metadata().unwrap().len() as usize; + let mut writer = BufWriter::new(temp_file); + + while remaining_size > 0 { + let bytes_to_write: usize = if remaining_size < FILE_WRITE_BUF_LEN { + remaining_size + } else { + FILE_WRITE_BUF_LEN + }; + let buffer = &mut buffer[..bytes_to_write]; + writer.write(buffer).unwrap(); + + remaining_size -= bytes_to_write; + } + } + } + } } diff --git a/src/core.rs b/src/core.rs index 4bb1f57e..a6216897 100644 --- a/src/core.rs +++ b/src/core.rs @@ -1,7 +1,8 @@ +use std::env; use std::error::Error; use std::fmt; use std::fs; -use std::io::{self, prelude::*}; +use std::io::{self, prelude::*, Write}; use std::path::{Path, PathBuf}; use std::time::Duration; @@ -65,7 +66,6 @@ pub struct Options { pub insecure: bool, pub isolate: bool, pub no_audio: bool, - pub no_color: bool, pub no_css: bool, pub no_fonts: bool, pub no_frames: bool, @@ -659,24 +659,29 @@ pub fn read_stdin() -> Vec { } } -use std::io::Write; - pub fn print_error_message(text: &str, options: &Options) { if !options.silent { let stderr = io::stderr(); let mut handle = stderr.lock(); + const ENV_VAR_NO_COLOR: &str = "NO_COLOR"; + const ENV_VAR_TERM: &str = "TERM"; + + let mut no_color = + env::var_os(ENV_VAR_NO_COLOR).is_some() || atty::isnt(atty::Stream::Stderr); + if let Some(term) = env::var_os(ENV_VAR_TERM) { + if term == "dumb" { + no_color = true; + } + } + if handle .write_all( format!( "{}{}{}\n", - if options.no_color { "" } else { ANSI_COLOR_RED }, + if no_color { "" } else { ANSI_COLOR_RED }, &text, - if options.no_color { - "" - } else { - ANSI_COLOR_RESET - }, + if no_color { "" } else { ANSI_COLOR_RESET }, ) .as_bytes(), ) diff --git a/src/main.rs b/src/main.rs index 40d6deb8..0408baab 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,4 +1,3 @@ -use std::env; use std::fs; use std::io::{self, Error as IoError, Write}; use std::process; @@ -55,16 +54,15 @@ const ASCII: &str = " \ | | \\___/ | | \\ | | | | | | | |___| |__________| \\_____________________| |___| |___| |___| "; -const CACHE_ASSET_FILE_SIZE_THRESHOLD: usize = 1024 * 50; // Minimum file size for on-disk caching (in bytes) +const CACHE_ASSET_FILE_SIZE_THRESHOLD: usize = 1024 * 10; // Minimum file size for on-disk caching (in bytes) const DEFAULT_NETWORK_TIMEOUT: u64 = 120; const DEFAULT_USER_AGENT: &str = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:135.0) Gecko/20100101 Firefox/135.0"; -const ENV_VAR_NO_COLOR: &str = "NO_COLOR"; -const ENV_VAR_TERM: &str = "TERM"; fn main() { // Process CLI flags and options let mut cookie_file_path: Option = None; + let mut exit_code = 0; let mut options: Options = Options::default(); let source; let destination; @@ -79,7 +77,7 @@ fn main() { "-B, --blacklist-domains 'Treat list of specified domains as blacklist'", ) .args_from_usage("-c, --no-css 'Remove CSS'") - .args_from_usage("-C, --cookies=[cookies.txt] 'Specify cookie file'") + .args_from_usage("-C, --cookie-file=[cookies.txt] 'Specify cookie file'") .arg( Arg::with_name("domains") .short('d') @@ -104,7 +102,7 @@ fn main() { .args_from_usage( "-o, --output=[document.html] 'Write output to , use - for STDOUT'", ) - .args_from_usage("-s, --silent 'Suppress verbosity'") + .args_from_usage("-q, --quiet 'Suppress verbosity'") .args_from_usage("-t, --timeout=[60] 'Adjust network request timeout'") .args_from_usage("-u, --user-agent=[Firefox] 'Set custom User-Agent string'") .args_from_usage("-v, --no-video 'Remove video sources'") @@ -128,7 +126,7 @@ fn main() { } options.blacklist_domains = app.is_present("blacklist-domains"); options.no_css = app.is_present("no-css"); - if let Some(cookie_file) = app.value_of("cookies") { + if let Some(cookie_file) = app.value_of("cookie-file") { cookie_file_path = Some(cookie_file.to_string()); } if let Some(encoding) = app.value_of("encoding") { @@ -147,7 +145,7 @@ fn main() { options.insecure = app.is_present("insecure"); options.no_metadata = app.is_present("no-metadata"); destination = app.value_of("output").unwrap_or("").to_string(); - options.silent = app.is_present("silent"); + options.silent = app.is_present("quiet"); options.timeout = app .value_of("timeout") .unwrap_or(&DEFAULT_NETWORK_TIMEOUT.to_string()) @@ -160,14 +158,6 @@ fn main() { } options.unwrap_noscript = app.is_present("unwrap-noscript"); options.no_video = app.is_present("no-video"); - - options.no_color = - env::var_os(ENV_VAR_NO_COLOR).is_some() || atty::isnt(atty::Stream::Stderr); - if let Some(term) = env::var_os(ENV_VAR_TERM) { - if term == "dumb" { - options.no_color = true; - } - } } // Set up cache (attempt to create temporary file) @@ -175,7 +165,7 @@ fn main() { Ok(tempfile) => Some(tempfile), Err(_) => None, }; - let cache = Cache::new( + let mut cache = Some(Cache::new( CACHE_ASSET_FILE_SIZE_THRESHOLD, if temp_cache_file.is_some() { Some( @@ -189,7 +179,7 @@ fn main() { } else { None }, - ); + )); // Read and parse cookie file if let Some(opt_cookie_file) = cookie_file_path.clone() { @@ -222,7 +212,7 @@ fn main() { } } - match create_monolithic_document(source, &options, &mut Some(cache)) { + match create_monolithic_document(source, &options, &mut cache) { Ok(result) => { // Define output let mut output = Output::new(&destination).expect("could not prepare output"); @@ -233,7 +223,14 @@ fn main() { Err(error) => { print_error_message(&format!("Error: {}", error), &options); - process::exit(1); + exit_code = 1; } } + + // Clean up (shred database file) + cache.unwrap().destroy_database_file(); + + if exit_code > 0 { + process::exit(exit_code); + } }