Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions module2/duplicate_text_detection/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
[package]
name = "duplicate_text_detection"
version = "0.1.0"
edition = "2021"

[dependencies]
sha3 = "0.10.8"
rand = "0.8.4"
hex = "0.4.3"
38 changes: 38 additions & 0 deletions module2/duplicate_text_detection/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
SHELL := /bin/bash
.PHONY: help

help:
@grep -E '^[a-zA-Z0-9_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-20s\033[0m %s\n", $$1, $$2}'

clean: ## Clean the project using cargo
cargo clean

build: ## Build the project using cargo
cargo build

run: ## Run the project using cargo
cargo run

test: ## Run the tests using cargo
cargo test

lint: ## Run the linter using cargo
@rustup component add clippy 2> /dev/null
cargo clippy

format: ## Format the code using cargo
@rustup component add rustfmt 2> /dev/null
cargo fmt

release:
cargo build --release

all: format lint test run

bump: ## Bump the version of the project
@echo "Current version is $(shell cargo pkgid | cut -d# -f2)"
@read -p "Enter the new version: " version; \
updated_version=$$(cargo pkgid | cut -d# -f2 | sed "s/$(shell cargo pkgid | cut -d# -f2)/$$version/"); \
sed -i -E "s/^version = .*/version = \"$$updated_version\"/" Cargo.toml
@echo "Version bumped to $$(cargo pkgid | cut -d# -f2)"
rm Cargo.toml-e
67 changes: 67 additions & 0 deletions module2/duplicate_text_detection/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
use rand::prelude::SliceRandom;
use rand::thread_rng;
use rand::Rng;
use sha3::Digest;
use sha3::Sha3_256;
use std::collections::HashMap;

// List of phrases
static PHRASES: [&str; 10] = [
"Be curious, not judgmental.",
"Football is life, but it's not the life.",
"Believe.",
"I believe in communism. Rom-communism, that is.",
"You know what the happiest animal on Earth is? A goldfish. You know why? Got a 10-second memory. Be a goldfish, Sam.",
"I think that you might be so sure a person is one thing, that sometimes you completely miss who they really are.",
"I promise you, there is something worse out there than being sad, and that's being alone and being sad. Ain't no one in this room alone.",
"Winning isn't everything, but wanting to win is.",
"It's important to find people who challenge and inspire you, people who care about you and push you to be your best. And remember, it's okay to ask for help.",
"I'm like an incomplete list of Madeline Kahn's best films. I ain't got no clue.",
];

// Generate random phrases
pub fn generate_random_phrases() -> Vec<&'static str> {
let mut rng = thread_rng();
let mut phrases = Vec::new();

for &phrase in PHRASES.iter() {
let copies = rng.gen_range(1..=3);

for _ in 0..copies {
phrases.push(phrase);
}
}

phrases.shuffle(&mut rng);

phrases
}

// Analyze duplicates
pub fn analyze_duplicates(phrases: &[&str]) {
let mut hashes: HashMap<_, (usize, &str)> = HashMap::new();
println!("Total number of phrases: {}", phrases.len());

for phrase in phrases {
let hash = Sha3_256::digest(phrase.as_bytes());
let entry = hashes.entry(hash).or_insert((0, phrase));
entry.0 += 1;
}

let total_unique_phrases = hashes.len();

let mut total_unique_duplicates = 0;
let mut total_combined_duplicates = 0;

for (hash, (count, phrase)) in &hashes {
if *count > 1 {
total_unique_duplicates += 1;
total_combined_duplicates += count - 1; // subtract one to exclude the original
println!("{} - {} times: {}", hex::encode(hash), count, phrase);
}
}

println!("Total Unique Phrases: {}", total_unique_phrases);
println!("Total Unique Duplicates: {}", total_unique_duplicates);
println!("Total Combined Duplicates: {}", total_combined_duplicates);
}
27 changes: 27 additions & 0 deletions module2/duplicate_text_detection/src/main.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
/*
Generates random duplicate phrases from a list of phrases
and prints the number of unique phrases and the number of duplicate phrases.

Example output:

Total number of phrases: 23
2a2e73e2a2d6b56eee4c5c8ad738020d2434a2af922e28293ae7911ae7bddcb2 - 3 times: Believe.
e55ec34eff9881511727b17a583919b6e0e49e15799693a3aa000b1428a35f9d - 2 times: Winning isn't everything, but wanting to win is.
f61635abca438d9ea7576b240a6d04163c4a2a33d9aced6feab999feb31cf8d1 - 2 times: Football is life, but it's not the life.
6a4f1a4e33e60565fe9be08113b3700cd87b4e5b9b99a2cd73d8cc679b070f01 - 3 times: I think that you might be so sure a person is one thing, that sometimes you completely miss who they really are.
7501e06b5c21dd83151e3b924e9513e937cc1f083faf017987ba2a4e982536d4 - 2 times: It's important to find people who challenge and inspire you, people who care about you and push you to be your best. And remember, it's okay to ask for help.
2a77903cddd57b9612116009d78845025a577b18ffabfa1418e25cb9adc21ddb - 2 times: Be curious, not judgmental.
b0a8c43b81216d3c2226dd492561d6db50adcdd9eccf7a1b1c9bc0588a76cae2 - 3 times: I'm like an incomplete list of Madeline Kahn's best films. I ain't got no clue.
eeb975a58d29186433e4edbc45cf57869b56cc12dec28addada2961e706f548c - 2 times: I promise you, there is something worse out there than being sad, and that's being alone and being sad. Ain't no one in this room alone.
8ea1b6258b0e074076d7bb75d5c8e8bdf5d571efac0a1ba2370083214d08fd3c - 3 times: You know what the happiest animal on Earth is? A goldfish. You know why? Got a 10-second memory. Be a goldfish, Sam.
Total Unique Phrases: 10
Total Unique Duplicates: 9
Total Combined Duplicates: 13

*/
use duplicate_text_detection::generate_random_phrases;

fn main() {
let phrases = generate_random_phrases();
duplicate_text_detection::analyze_duplicates(&phrases);
}
Loading