diff --git a/module2/duplicate_text_detection/Cargo.toml b/module2/duplicate_text_detection/Cargo.toml new file mode 100644 index 0000000..abd7a14 --- /dev/null +++ b/module2/duplicate_text_detection/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "duplicate_text_detection" +version = "0.1.0" +edition = "2021" + +[dependencies] +sha3 = "0.10.8" +rand = "0.8.4" +hex = "0.4.3" diff --git a/module2/duplicate_text_detection/Makefile b/module2/duplicate_text_detection/Makefile new file mode 100644 index 0000000..4daa6f8 --- /dev/null +++ b/module2/duplicate_text_detection/Makefile @@ -0,0 +1,38 @@ +SHELL := /bin/bash +.PHONY: help + +help: + @grep -E '^[a-zA-Z0-9_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-20s\033[0m %s\n", $$1, $$2}' + +clean: ## Clean the project using cargo + cargo clean + +build: ## Build the project using cargo + cargo build + +run: ## Run the project using cargo + cargo run + +test: ## Run the tests using cargo + cargo test + +lint: ## Run the linter using cargo + @rustup component add clippy 2> /dev/null + cargo clippy + +format: ## Format the code using cargo + @rustup component add rustfmt 2> /dev/null + cargo fmt + +release: + cargo build --release + +all: format lint test run + +bump: ## Bump the version of the project + @echo "Current version is $(shell cargo pkgid | cut -d# -f2)" + @read -p "Enter the new version: " version; \ + updated_version=$$(cargo pkgid | cut -d# -f2 | sed "s/$(shell cargo pkgid | cut -d# -f2)/$$version/"); \ + sed -i -E "s/^version = .*/version = \"$$updated_version\"/" Cargo.toml + @echo "Version bumped to $$(cargo pkgid | cut -d# -f2)" + rm Cargo.toml-e \ No newline at end of file diff --git a/module2/duplicate_text_detection/src/lib.rs b/module2/duplicate_text_detection/src/lib.rs new file mode 100644 index 0000000..88dc917 --- /dev/null +++ b/module2/duplicate_text_detection/src/lib.rs @@ -0,0 +1,67 @@ +use rand::prelude::SliceRandom; +use rand::thread_rng; +use rand::Rng; +use sha3::Digest; +use sha3::Sha3_256; +use std::collections::HashMap; + +// List of phrases +static PHRASES: [&str; 10] = [ + "Be curious, not judgmental.", + "Football is life, but it's not the life.", + "Believe.", + "I believe in communism. Rom-communism, that is.", + "You know what the happiest animal on Earth is? A goldfish. You know why? Got a 10-second memory. Be a goldfish, Sam.", + "I think that you might be so sure a person is one thing, that sometimes you completely miss who they really are.", + "I promise you, there is something worse out there than being sad, and that's being alone and being sad. Ain't no one in this room alone.", + "Winning isn't everything, but wanting to win is.", + "It's important to find people who challenge and inspire you, people who care about you and push you to be your best. And remember, it's okay to ask for help.", + "I'm like an incomplete list of Madeline Kahn's best films. I ain't got no clue.", +]; + +// Generate random phrases +pub fn generate_random_phrases() -> Vec<&'static str> { + let mut rng = thread_rng(); + let mut phrases = Vec::new(); + + for &phrase in PHRASES.iter() { + let copies = rng.gen_range(1..=3); + + for _ in 0..copies { + phrases.push(phrase); + } + } + + phrases.shuffle(&mut rng); + + phrases +} + +// Analyze duplicates +pub fn analyze_duplicates(phrases: &[&str]) { + let mut hashes: HashMap<_, (usize, &str)> = HashMap::new(); + println!("Total number of phrases: {}", phrases.len()); + + for phrase in phrases { + let hash = Sha3_256::digest(phrase.as_bytes()); + let entry = hashes.entry(hash).or_insert((0, phrase)); + entry.0 += 1; + } + + let total_unique_phrases = hashes.len(); + + let mut total_unique_duplicates = 0; + let mut total_combined_duplicates = 0; + + for (hash, (count, phrase)) in &hashes { + if *count > 1 { + total_unique_duplicates += 1; + total_combined_duplicates += count - 1; // subtract one to exclude the original + println!("{} - {} times: {}", hex::encode(hash), count, phrase); + } + } + + println!("Total Unique Phrases: {}", total_unique_phrases); + println!("Total Unique Duplicates: {}", total_unique_duplicates); + println!("Total Combined Duplicates: {}", total_combined_duplicates); +} diff --git a/module2/duplicate_text_detection/src/main.rs b/module2/duplicate_text_detection/src/main.rs new file mode 100644 index 0000000..3e441c2 --- /dev/null +++ b/module2/duplicate_text_detection/src/main.rs @@ -0,0 +1,27 @@ +/* +Generates random duplicate phrases from a list of phrases +and prints the number of unique phrases and the number of duplicate phrases. + +Example output: + +Total number of phrases: 23 +2a2e73e2a2d6b56eee4c5c8ad738020d2434a2af922e28293ae7911ae7bddcb2 - 3 times: Believe. +e55ec34eff9881511727b17a583919b6e0e49e15799693a3aa000b1428a35f9d - 2 times: Winning isn't everything, but wanting to win is. +f61635abca438d9ea7576b240a6d04163c4a2a33d9aced6feab999feb31cf8d1 - 2 times: Football is life, but it's not the life. +6a4f1a4e33e60565fe9be08113b3700cd87b4e5b9b99a2cd73d8cc679b070f01 - 3 times: I think that you might be so sure a person is one thing, that sometimes you completely miss who they really are. +7501e06b5c21dd83151e3b924e9513e937cc1f083faf017987ba2a4e982536d4 - 2 times: It's important to find people who challenge and inspire you, people who care about you and push you to be your best. And remember, it's okay to ask for help. +2a77903cddd57b9612116009d78845025a577b18ffabfa1418e25cb9adc21ddb - 2 times: Be curious, not judgmental. +b0a8c43b81216d3c2226dd492561d6db50adcdd9eccf7a1b1c9bc0588a76cae2 - 3 times: I'm like an incomplete list of Madeline Kahn's best films. I ain't got no clue. +eeb975a58d29186433e4edbc45cf57869b56cc12dec28addada2961e706f548c - 2 times: I promise you, there is something worse out there than being sad, and that's being alone and being sad. Ain't no one in this room alone. +8ea1b6258b0e074076d7bb75d5c8e8bdf5d571efac0a1ba2370083214d08fd3c - 3 times: You know what the happiest animal on Earth is? A goldfish. You know why? Got a 10-second memory. Be a goldfish, Sam. +Total Unique Phrases: 10 +Total Unique Duplicates: 9 +Total Combined Duplicates: 13 + +*/ +use duplicate_text_detection::generate_random_phrases; + +fn main() { + let phrases = generate_random_phrases(); + duplicate_text_detection::analyze_duplicates(&phrases); +}