diff --git a/Cargo.lock b/Cargo.lock index 07b0794..8001eb9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -26,6 +26,12 @@ dependencies = [ "generic-array 0.14.7", ] +[[package]] +name = "bumpalo" +version = "3.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" + [[package]] name = "byteorder" version = "1.5.0" @@ -38,6 +44,19 @@ version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2fd1289c04a9ea8cb22300a459a72a385d7c73d3259e2ed7dcb2af674838cfa9" +[[package]] +name = "console" +version = "0.16.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b430743a6eb14e9764d4260d4c0d8123087d504eeb9c48f2b2a5e810dd369df4" +dependencies = [ + "encode_unicode", + "libc", + "once_cell", + "unicode-width", + "windows-sys", +] + [[package]] name = "cpufeatures" version = "0.2.17" @@ -114,6 +133,12 @@ version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" +[[package]] +name = "encode_unicode" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0" + [[package]] name = "generic-array" version = "0.14.7" @@ -173,6 +198,7 @@ dependencies = [ "git-version", "glob", "hex", + "indicatif", "md-5", "pico-args", "rayon", @@ -196,6 +222,29 @@ version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" +[[package]] +name = "indicatif" +version = "0.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70a646d946d06bedbbc4cac4c218acf4bbf2d87757a784857025f4d447e4e1cd" +dependencies = [ + "console", + "portable-atomic", + "unicode-width", + "unit-prefix", + "web-time", +] + +[[package]] +name = "js-sys" +version = "0.3.78" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c0b063578492ceec17683ef2f8c5e89121fbd0b172cbc280635ab7567db2738" +dependencies = [ + "once_cell", + "wasm-bindgen", +] + [[package]] name = "keccak" version = "0.1.5" @@ -211,6 +260,12 @@ version = "0.2.175" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6a82ae493e598baaea5209805c49bbf2ea7de956d50d7da0da1164f9c6d28543" +[[package]] +name = "log" +version = "0.4.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34080505efa8e45a4b816c349525ebe327ceaa8559756f0356cba97ef3bf7432" + [[package]] name = "md-5" version = "0.10.6" @@ -221,12 +276,24 @@ dependencies = [ "digest", ] +[[package]] +name = "once_cell" +version = "1.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" + [[package]] name = "pico-args" version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5be167a7af36ee22fe3115051bc51f6e6c7054c9348e28deb4f49bd6f705a315" +[[package]] +name = "portable-atomic" +version = "1.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f84267b20a16ea918e43c6a88433c2d54fa145c92a811b5b047ccbe153674483" + [[package]] name = "proc-macro2" version = "1.0.101" @@ -358,12 +425,92 @@ version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" +[[package]] +name = "unicode-width" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a1a07cc7db3810833284e8d372ccdc6da29741639ecc70c9ec107df0fa6154c" + +[[package]] +name = "unit-prefix" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "323402cff2dd658f39ca17c789b502021b3f18707c91cdf22e3838e1b4023817" + [[package]] name = "version_check" version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" +[[package]] +name = "wasm-bindgen" +version = "0.2.101" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e14915cadd45b529bb8d1f343c4ed0ac1de926144b746e2710f9cd05df6603b" +dependencies = [ + "cfg-if", + "once_cell", + "wasm-bindgen-macro", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.101" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e28d1ba982ca7923fd01448d5c30c6864d0a14109560296a162f80f305fb93bb" +dependencies = [ + "bumpalo", + "log", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.101" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c3d463ae3eff775b0c45df9da45d68837702ac35af998361e2c84e7c5ec1b0d" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.101" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7bb4ce89b08211f923caf51d527662b75bdc9c9c7aab40f86dcb9fb85ac552aa" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.101" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f143854a3b13752c6950862c906306adb27c7e839f7414cec8fea35beab624c1" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "web-time" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + [[package]] name = "whirlpool" version = "0.10.4" @@ -372,3 +519,18 @@ checksum = "f1ae50671d985c15b3214c7d969b8b520759fb3c8682444bec15ef775335a05c" dependencies = [ "digest", ] + +[[package]] +name = "windows-link" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "45e46c0661abb7180e7b9c281db115305d49ca1709ab8242adf09666d2173c65" + +[[package]] +name = "windows-sys" +version = "0.61.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e201184e40b2ede64bc2ea34968b28e33622acdbbf37104f0e4a33f7abe657aa" +dependencies = [ + "windows-link", +] diff --git a/Cargo.toml b/Cargo.toml index 0f23c30..2f83f90 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -31,3 +31,4 @@ sha3 = { default-features = false, version = ">= 0.10.8" } whirlpool = { default-features = false, version = ">= 0.10.4" } blake2 = { default-features = false, version = ">= 0.10.6" } crc32fast = { default-features = false, version = ">= 1.4.2" } +indicatif = ">= 0.17.9" diff --git a/src/hasher.rs b/src/hasher.rs index 783fab9..c837c5a 100644 --- a/src/hasher.rs +++ b/src/hasher.rs @@ -8,6 +8,10 @@ use digest::{Digest, Output}; use crate::classes::{BasicHash, OutputEncoding}; +/// Buffer size for reading large files in chunks. 32KB provides optimal performance +/// by balancing memory usage with I/O efficiency. Larger buffers reduce syscall overhead +/// but increase memory pressure, while smaller buffers result in more frequent I/O operations. +/// Files ≤32KB are read entirely into memory for maximum performance. const BUFFER_SIZE: usize = 4096 * 8; /// Hash a file using the given hasher as a Digest implementation, eg `Sha1`, `Sha256`, `Sha3_256` diff --git a/src/main.rs b/src/main.rs index 52f2429..e0d0f7a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -9,6 +9,7 @@ use std::io::BufRead; use std::str::FromStr; use anyhow::{Result, anyhow}; +use std::time::{Duration, Instant}; //use crate::hasher::hash_file_crc32; use blake2::{Blake2b512, Blake2s256}; @@ -22,6 +23,7 @@ use whirlpool::Whirlpool; use classes::OutputEncoding; use hasher::{file_exists, hash_file_encoded}; +use progress::ProgressManager; use crate::classes::{ BasicHash, ConfigSettings, DEFAULT_HASH, GIT_VERSION_SHORT, HELP, HashAlgorithm, VERSION, @@ -30,6 +32,7 @@ use crate::classes::{ mod classes; mod crc32; mod hasher; +mod progress; mod unit_tests; /// Call the inner worker function, and show help if there is an error @@ -250,7 +253,7 @@ fn get_paths_matching_glob(config: &ConfigSettings) -> Result> { /// output all file hashes matching a pattern, directly to stdout. Single-threaded fn file_hashes_st(config: &ConfigSettings, paths: &[S]) where - S: AsRef + Display, + S: AsRef + Display + Send + Sync, { if config.debug_mode { eprintln!("Single-threaded mode"); @@ -258,7 +261,7 @@ where } for pathstr in paths { - let file_hash = call_hasher(config.algorithm, config.encoding, pathstr); + let file_hash = hash_with_progress(config, pathstr.as_ref().to_string()); match file_hash { Ok(basic_hash) => { @@ -268,7 +271,7 @@ where println!("{basic_hash} {pathstr}"); } } - Err(e) => eprintln!("'{pathstr}' file err {e:?}"), + Err(e) => eprintln!("File error for '{}': {}", pathstr, e), } } } @@ -283,9 +286,28 @@ where eprintln!("Algorithm: {:?}", config.algorithm); } + // For large file sets, show an overall progress bar instead of per-file spinners + let overall_progress = ProgressManager::create_overall_progress(paths.len(), config.debug_mode); + // process the paths in parallel paths.par_iter().for_each(|pathstr| { + let start_time = Instant::now(); let file_hash = call_hasher(config.algorithm, config.encoding, pathstr); + let elapsed = start_time.elapsed(); + + // Update overall progress bar if it exists + if let Some(ref pb) = overall_progress { + pb.inc(1); + } + + // If the operation took more than threshold, mention it in debug mode + if config.debug_mode && elapsed >= Duration::from_secs(ProgressManager::threshold_secs()) { + eprintln!( + "File '{}' took {:.2}s to hash", + pathstr, + elapsed.as_secs_f64() + ); + } match file_hash { Ok(basic_hash) => { @@ -297,9 +319,47 @@ where } // failed to calculate the hash - Err(e) => eprintln!("'{pathstr}' file err {e:?}"), + Err(e) => eprintln!("File error for '{}': {}", pathstr, e), } }); + + // Finish the overall progress bar if it exists + if let Some(pb) = overall_progress { + pb.finish_with_message("Complete!"); + } +} + +/// Hash a file with progress indication for operations taking >1 second +fn hash_with_progress(config: &ConfigSettings, pathstr: S) -> Result +where + S: AsRef + Display + Clone + Send + 'static, +{ + let pathstr_clone = pathstr.clone(); + + // Create progress indication handle + let progress_handle = + ProgressManager::create_file_progress(pathstr_clone.clone(), config.debug_mode); + + // Perform the actual hashing + let start_time = Instant::now(); + let result = call_hasher(config.algorithm, config.encoding, pathstr); + let elapsed = start_time.elapsed(); + + // Signal completion and clean up progress resources + if let Some(handle) = progress_handle { + handle.finish(config.debug_mode); + } + + // Log timing info in debug mode for long operations + if config.debug_mode && elapsed >= Duration::from_secs(ProgressManager::threshold_secs()) { + eprintln!( + "File '{}' took {:.2}s to hash", + pathstr_clone, + elapsed.as_secs_f64() + ); + } + + result } /// calculate the hash of a file using given algorithm @@ -312,7 +372,9 @@ fn call_hasher( if (algo == HashAlgorithm::CRC32 && encoding != OutputEncoding::U32) || (algo != HashAlgorithm::CRC32 && encoding == OutputEncoding::U32) { - return Err(anyhow!("CRC32 can only be output as U32")); + return Err(anyhow!( + "CRC32 must use U32 encoding, and U32 encoding can only be used with CRC32" + )); } match algo { diff --git a/src/progress.rs b/src/progress.rs new file mode 100644 index 0000000..a12fa97 --- /dev/null +++ b/src/progress.rs @@ -0,0 +1,146 @@ +//! Progress indication module for file hashing operations +//! +//! This module provides progress tracking for both single files (with spinners for long operations) +//! and multiple files (with progress bars for large sets). It manages thread safety and resource +//! limits to prevent system exhaustion. + +use indicatif::{ProgressBar, ProgressStyle}; +use std::fmt::Display; +use std::sync::{ + Arc, + atomic::{AtomicUsize, Ordering}, + mpsc, +}; +use std::time::Duration; + +// Global counter for active progress threads to prevent resource exhaustion +static ACTIVE_PROGRESS_THREADS: AtomicUsize = AtomicUsize::new(0); +const MAX_PROGRESS_THREADS: usize = 4; +const PROGRESS_THRESHOLD_SECS: u64 = 1; + +/// Handle for a progress indication session +pub struct ProgressHandle { + sender: Option>, + thread_handle: Option>, +} + +impl ProgressHandle { + /// Signal completion and clean up resources + pub fn finish(mut self, debug_mode: bool) { + // Signal completion - this will wake up the progress thread immediately + if let Some(tx) = self.sender.take() { + let _ = tx.send(()); + } + + // Wait for progress thread to finish - simple cleanup + if let Some(handle) = self.thread_handle.take() { + if handle.join().is_err() && debug_mode { + eprintln!("Progress thread join failed"); + } + } + } +} + +/// Manager for progress indication across different operation types +pub struct ProgressManager; + +impl ProgressManager { + /// Create a progress indication for a single file operation + /// Shows a spinner if the operation takes longer than the threshold + pub fn create_file_progress(pathstr: S, debug_mode: bool) -> Option + where + S: AsRef + Display + Clone + Send + 'static, + { + // Only show progress spinners if not in debug mode and we haven't exceeded thread limit + let should_show_progress = + !debug_mode && ACTIVE_PROGRESS_THREADS.load(Ordering::Relaxed) < MAX_PROGRESS_THREADS; + + if !should_show_progress { + return None; + } + + // Create a channel to signal completion + let (tx, rx) = mpsc::channel(); + + // Increment the counter + ACTIVE_PROGRESS_THREADS.fetch_add(1, Ordering::Relaxed); + + let handle = std::thread::spawn(move || { + // Wait for either completion signal or threshold timeout + match rx.recv_timeout(Duration::from_secs(PROGRESS_THRESHOLD_SECS)) { + Ok(()) => { + // Operation completed before threshold, no progress needed + } + Err(mpsc::RecvTimeoutError::Timeout) => { + // Threshold passed, show progress spinner + let pb = Self::create_progress_spinner(pathstr.as_ref()); + if let Some(pb) = pb { + pb.enable_steady_tick(Duration::from_millis(120)); + + // Wait for completion signal + let _ = rx.recv(); + pb.finish_and_clear(); + } + } + Err(mpsc::RecvTimeoutError::Disconnected) => { + // Sender dropped, operation completed + } + } + + // Decrement the counter when thread finishes + ACTIVE_PROGRESS_THREADS.fetch_sub(1, Ordering::Relaxed); + }); + + Some(ProgressHandle { + sender: Some(tx), + thread_handle: Some(handle), + }) + } + + /// Create an overall progress bar for multiple file operations + pub fn create_overall_progress( + file_count: usize, + debug_mode: bool, + ) -> Option> { + // For large file sets, show an overall progress bar instead of per-file spinners + if debug_mode || file_count < 10 { + return None; + } + + let pb = ProgressBar::new(file_count as u64); + let style = ProgressStyle::default_bar() + .template("{bar:40.cyan/blue} {pos}/{len} files ({percent}%) {msg}") + .unwrap_or_else(|_| { + ProgressStyle::default_bar() + .template("{bar:40} {pos}/{len} files") + .unwrap_or_else(|_| ProgressStyle::default_bar()) + }); + pb.set_style(style); + pb.set_message("Processing..."); + Some(Arc::new(pb)) + } + + /// Create a progress spinner with safe error handling + fn create_progress_spinner(pathstr: &str) -> Option { + let pb = ProgressBar::new_spinner(); + + // Use unwrap_or_else to provide fallback template if parsing fails + let style = ProgressStyle::default_spinner() + .template("{spinner:.green} Hashing {msg}...") + .unwrap_or_else(|_| { + // Fallback to simpler template if the main one fails + ProgressStyle::default_spinner() + .template("{spinner} Hashing...") + .unwrap_or_else(|_| ProgressStyle::default_spinner()) + }); + + pb.set_style(style); + pb.set_message(pathstr.to_string()); + Some(pb) + } + + /// Get the progress threshold in seconds + pub fn threshold_secs() -> u64 { + PROGRESS_THRESHOLD_SECS + } +} diff --git a/src/unit_tests.rs b/src/unit_tests.rs index eb43ec2..110744c 100644 --- a/src/unit_tests.rs +++ b/src/unit_tests.rs @@ -1,6 +1,5 @@ #[cfg(test)] use super::*; -use std::str::FromStr; #[test] fn test_parse_hash_algorithm_valid() {