Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ cat some-site-page.html | monolith -aIiFfcMv -b https://some.site/ - > some-site
- `-M`: Don't add timestamp and URL information
- `-n`: Extract contents of NOSCRIPT elements
- `-o`: Write output to `file` (use “-” for STDOUT)
- `-s`: Be quiet
- `-q`: Be quiet
- `-t`: Adjust `network request timeout`
- `-u`: Provide `custom User-Agent`
- `-v`: Exclude videos
Expand Down
47 changes: 44 additions & 3 deletions src/cache.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
use std::collections::HashMap;
use std::fs::File;
use std::io::{BufWriter, Write};
use std::path::Path;

use redb::{Database, Error, TableDefinition};
Expand All @@ -15,8 +17,10 @@ pub struct Cache {
metadata: HashMap<String, CacheMetadataItem>, // Dictionary of metadata (and occasionally data [mostly for very small files])
db: Option<Database>, // Pointer to database instance; None if not yet initialized or if failed to initialize
db_ok: Option<bool>, // None by default, Some(true) if was able to initialize database, Some (false) if an error occured
db_file_path: Option<String>, // Filesystem path to file used for storing database
}

const FILE_WRITE_BUF_LEN: usize = 1024 * 100; // On-disk cache file write buffer size (in bytes)
const TABLE: TableDefinition<&str, &[u8]> = TableDefinition::new("_");

impl Cache {
Expand All @@ -26,6 +30,7 @@ impl Cache {
metadata: HashMap::new(),
db: None,
db_ok: None,
db_file_path: db_file_path.clone(),
};

if db_file_path.is_some() {
Expand All @@ -52,14 +57,14 @@ impl Cache {
data: if self.db_ok.is_some() && self.db_ok.unwrap() {
None
} else {
Some((data.to_owned()).to_vec())
Some(data.to_owned().to_vec())
},
media_type: Some(media_type.to_owned()),
charset: Some(charset),
};

if (self.db_ok.is_none() || !self.db_ok.unwrap()) || data.len() <= self.min_file_size {
cache_metadata_item.data = Some((data.to_owned()).to_vec());
cache_metadata_item.data = Some(data.to_owned().to_vec());
} else {
match self.db.as_ref().unwrap().begin_write() {
Ok(write_txn) => {
Expand All @@ -71,7 +76,7 @@ impl Cache {
}
Err(..) => {
// Fall back to caching everything in memory
cache_metadata_item.data = Some((data.to_owned()).to_vec());
cache_metadata_item.data = Some(data.to_owned().to_vec());
}
}
}
Expand Down Expand Up @@ -110,4 +115,40 @@ impl Cache {
pub fn contains_key(&self, key: &str) -> bool {
self.metadata.contains_key(key)
}

pub fn destroy_database_file(&mut self) {
if self.db_ok.is_none() || !self.db_ok.unwrap() {
return;
}

// Destroy database instance (prevents writes into file)
self.db = None;
self.db_ok = Some(false);

// Wipe database file
if let Some(db_file_path) = self.db_file_path.to_owned() {
// Overwrite file with zeroes
if let Ok(temp_file) = File::options()
.read(true)
.write(true)
.open(db_file_path.clone())
{
let mut buffer = [0; FILE_WRITE_BUF_LEN];
let mut remaining_size: usize = temp_file.metadata().unwrap().len() as usize;
let mut writer = BufWriter::new(temp_file);

while remaining_size > 0 {
let bytes_to_write: usize = if remaining_size < FILE_WRITE_BUF_LEN {
remaining_size
} else {
FILE_WRITE_BUF_LEN
};
let buffer = &mut buffer[..bytes_to_write];
writer.write(buffer).unwrap();

remaining_size -= bytes_to_write;
}
}
}
}
}
25 changes: 15 additions & 10 deletions src/core.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
use std::env;
use std::error::Error;
use std::fmt;
use std::fs;
use std::io::{self, prelude::*};
use std::io::{self, prelude::*, Write};
use std::path::{Path, PathBuf};
use std::time::Duration;

Expand Down Expand Up @@ -65,7 +66,6 @@ pub struct Options {
pub insecure: bool,
pub isolate: bool,
pub no_audio: bool,
pub no_color: bool,
pub no_css: bool,
pub no_fonts: bool,
pub no_frames: bool,
Expand Down Expand Up @@ -659,24 +659,29 @@ pub fn read_stdin() -> Vec<u8> {
}
}

use std::io::Write;

pub fn print_error_message(text: &str, options: &Options) {
if !options.silent {
let stderr = io::stderr();
let mut handle = stderr.lock();

const ENV_VAR_NO_COLOR: &str = "NO_COLOR";
const ENV_VAR_TERM: &str = "TERM";

let mut no_color =
env::var_os(ENV_VAR_NO_COLOR).is_some() || atty::isnt(atty::Stream::Stderr);
if let Some(term) = env::var_os(ENV_VAR_TERM) {
if term == "dumb" {
no_color = true;
}
}

if handle
.write_all(
format!(
"{}{}{}\n",
if options.no_color { "" } else { ANSI_COLOR_RED },
if no_color { "" } else { ANSI_COLOR_RED },
&text,
if options.no_color {
""
} else {
ANSI_COLOR_RESET
},
if no_color { "" } else { ANSI_COLOR_RESET },
)
.as_bytes(),
)
Expand Down
37 changes: 17 additions & 20 deletions src/main.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
use std::env;
use std::fs;
use std::io::{self, Error as IoError, Write};
use std::process;
Expand Down Expand Up @@ -55,16 +54,15 @@ const ASCII: &str = " \
| | \\___/ | | \\ | | | | | | |
|___| |__________| \\_____________________| |___| |___| |___|
";
const CACHE_ASSET_FILE_SIZE_THRESHOLD: usize = 1024 * 50; // Minimum file size for on-disk caching (in bytes)
const CACHE_ASSET_FILE_SIZE_THRESHOLD: usize = 1024 * 10; // Minimum file size for on-disk caching (in bytes)
const DEFAULT_NETWORK_TIMEOUT: u64 = 120;
const DEFAULT_USER_AGENT: &str =
"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:135.0) Gecko/20100101 Firefox/135.0";
const ENV_VAR_NO_COLOR: &str = "NO_COLOR";
const ENV_VAR_TERM: &str = "TERM";

fn main() {
// Process CLI flags and options
let mut cookie_file_path: Option<String> = None;
let mut exit_code = 0;
let mut options: Options = Options::default();
let source;
let destination;
Expand All @@ -79,7 +77,7 @@ fn main() {
"-B, --blacklist-domains 'Treat list of specified domains as blacklist'",
)
.args_from_usage("-c, --no-css 'Remove CSS'")
.args_from_usage("-C, --cookies=[cookies.txt] 'Specify cookie file'")
.args_from_usage("-C, --cookie-file=[cookies.txt] 'Specify cookie file'")
.arg(
Arg::with_name("domains")
.short('d')
Expand All @@ -104,7 +102,7 @@ fn main() {
.args_from_usage(
"-o, --output=[document.html] 'Write output to <file>, use - for STDOUT'",
)
.args_from_usage("-s, --silent 'Suppress verbosity'")
.args_from_usage("-q, --quiet 'Suppress verbosity'")
.args_from_usage("-t, --timeout=[60] 'Adjust network request timeout'")
.args_from_usage("-u, --user-agent=[Firefox] 'Set custom User-Agent string'")
.args_from_usage("-v, --no-video 'Remove video sources'")
Expand All @@ -128,7 +126,7 @@ fn main() {
}
options.blacklist_domains = app.is_present("blacklist-domains");
options.no_css = app.is_present("no-css");
if let Some(cookie_file) = app.value_of("cookies") {
if let Some(cookie_file) = app.value_of("cookie-file") {
cookie_file_path = Some(cookie_file.to_string());
}
if let Some(encoding) = app.value_of("encoding") {
Expand All @@ -147,7 +145,7 @@ fn main() {
options.insecure = app.is_present("insecure");
options.no_metadata = app.is_present("no-metadata");
destination = app.value_of("output").unwrap_or("").to_string();
options.silent = app.is_present("silent");
options.silent = app.is_present("quiet");
options.timeout = app
.value_of("timeout")
.unwrap_or(&DEFAULT_NETWORK_TIMEOUT.to_string())
Expand All @@ -160,22 +158,14 @@ fn main() {
}
options.unwrap_noscript = app.is_present("unwrap-noscript");
options.no_video = app.is_present("no-video");

options.no_color =
env::var_os(ENV_VAR_NO_COLOR).is_some() || atty::isnt(atty::Stream::Stderr);
if let Some(term) = env::var_os(ENV_VAR_TERM) {
if term == "dumb" {
options.no_color = true;
}
}
}

// Set up cache (attempt to create temporary file)
let temp_cache_file = match Builder::new().prefix("monolith-").tempfile() {
Ok(tempfile) => Some(tempfile),
Err(_) => None,
};
let cache = Cache::new(
let mut cache = Some(Cache::new(
CACHE_ASSET_FILE_SIZE_THRESHOLD,
if temp_cache_file.is_some() {
Some(
Expand All @@ -189,7 +179,7 @@ fn main() {
} else {
None
},
);
));

// Read and parse cookie file
if let Some(opt_cookie_file) = cookie_file_path.clone() {
Expand Down Expand Up @@ -222,7 +212,7 @@ fn main() {
}
}

match create_monolithic_document(source, &options, &mut Some(cache)) {
match create_monolithic_document(source, &options, &mut cache) {
Ok(result) => {
// Define output
let mut output = Output::new(&destination).expect("could not prepare output");
Expand All @@ -233,7 +223,14 @@ fn main() {
Err(error) => {
print_error_message(&format!("Error: {}", error), &options);

process::exit(1);
exit_code = 1;
}
}

// Clean up (shred database file)
cache.unwrap().destroy_database_file();

if exit_code > 0 {
process::exit(exit_code);
}
}