diff --git a/Cargo.lock b/Cargo.lock index 195776d22750..cd0bcb87926c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2680,6 +2680,9 @@ version = "0.1.0" [[package]] name = "std-types" version = "0.1.0" +dependencies = [ + "clap", +] [[package]] name = "string_cache" diff --git a/src/SUMMARY.md b/src/SUMMARY.md index ef9f0cc54b2e..230db771436f 100644 --- a/src/SUMMARY.md +++ b/src/SUMMARY.md @@ -118,6 +118,8 @@ - [`HashMap`](std-types/hashmap.md) - [Exercise: Counter](std-types/exercise.md) - [Solution](std-types/solution.md) + - [Exercise: Word Counter](std-types/word_counter.md) + - [Solution](std-types/word_counter_solution.md) - [Standard Library Traits](std-traits.md) - [Comparisons](std-traits/comparisons.md) - [Operators](std-traits/operators.md) diff --git a/src/std-types/Cargo.toml b/src/std-types/Cargo.toml index fe46d703726b..31bc52d2488f 100644 --- a/src/std-types/Cargo.toml +++ b/src/std-types/Cargo.toml @@ -4,6 +4,12 @@ version = "0.1.0" edition = "2021" publish = false +[dependencies] + [[bin]] name = "hashset" path = "exercise.rs" + +[[bin]] +name = "word_counter" +path = "word_counter.rs" diff --git a/src/std-types/word_counter.exe b/src/std-types/word_counter.exe new file mode 100644 index 000000000000..b7727c651e09 Binary files /dev/null and b/src/std-types/word_counter.exe differ diff --git a/src/std-types/word_counter.md b/src/std-types/word_counter.md new file mode 100644 index 000000000000..bb6aab07366f --- /dev/null +++ b/src/std-types/word_counter.md @@ -0,0 +1,57 @@ +--- +minutes: 20 +--- + +# Exercise: Word Counter + +Create a program that counts the frequency of words in a given text. The program +should: + +1. Take a string of text as input +2. Split the text into words (consider words to be separated by whitespace) +3. Count how many times each word appears (case-insensitive) +4. Print the words and their counts in alphabetical order + +Use a `HashMap` to store the word counts. + +## Example + +```rust +{{#include word_counter.rs:main}} +``` + +Expected output: + +``` +brown: 1 +dog: 1 +fox: 1 +jumps: 1 +lazy: 1 +over: 1 +quick: 1 +the: 2 +``` + +## Tasks + +1. Implement the `count_words` function that takes a string slice and returns a + `HashMap` +2. Make the word counting case-insensitive (e.g., "The" and "the" count as the + same word) +3. Implement the `print_word_counts` function that prints the word counts in + alphabetical order +4. Add tests for: + - Empty input + - Simple text with repeated words + - Case-insensitive counting + +## Extension Tasks (Optional) + +1. Add support for reading text from a file +2. Add statistics like total words, unique words, and average word length +3. Find and display the most common words + +```rust,editable +{{#include word_counter.rs:exercise}} +``` diff --git a/src/std-types/word_counter.pdb b/src/std-types/word_counter.pdb new file mode 100644 index 000000000000..ac0b2cefcf6a Binary files /dev/null and b/src/std-types/word_counter.pdb differ diff --git a/src/std-types/word_counter.rs b/src/std-types/word_counter.rs new file mode 100644 index 000000000000..e9bd8f7cd9b2 --- /dev/null +++ b/src/std-types/word_counter.rs @@ -0,0 +1,222 @@ +// Copyright 2023 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// ANCHOR: exercise +use std::collections::HashMap; + +/// Count words in the given text and return a map of words to their counts. +/// Words are treated as case-insensitive. +fn count_words(text: &str) -> HashMap { + let mut word_counts = HashMap::new(); + + for word in text.split_whitespace() { + // Remove any punctuation and convert to lowercase + let word = word + .chars() + .filter(|c| c.is_alphanumeric()) + .collect::() + .to_lowercase(); + + if !word.is_empty() { + *word_counts.entry(word).or_insert(0) += 1; + } + } + + word_counts +} + +/// Print word counts in alphabetical order. +fn print_word_counts(counts: &HashMap) { + let mut words: Vec<_> = counts.keys().collect(); + words.sort(); + + for word in words { + println!("{}: {}", word, counts[word]); + } +} +// ANCHOR_END: exercise + +// ANCHOR: word_counter +/// WordCounter counts the frequency of words in text. +struct WordCounter { + word_counts: HashMap, +} + +impl WordCounter { + /// Create a new WordCounter. + fn new() -> Self { + WordCounter { word_counts: HashMap::new() } + } + + /// Count words in the given text. + fn count_words(&mut self, text: &str) { + for word in text.split_whitespace() { + // Remove any punctuation and convert to lowercase + let word = word + .chars() + .filter(|c| c.is_alphanumeric()) + .collect::() + .to_lowercase(); + + if !word.is_empty() { + *self.word_counts.entry(word).or_insert(0) += 1; + } + } + } + + /// Get the count for a specific word. + fn word_count(&self, word: &str) -> usize { + self.word_counts.get(&word.to_lowercase()).copied().unwrap_or(0) + } + + /// Find the most frequent word(s) and their count. + fn most_frequent(&self) -> Vec<(&str, usize)> { + if self.word_counts.is_empty() { + return Vec::new(); + } + + let max_count = self.word_counts.values().max().unwrap(); + self.word_counts + .iter() + .filter(|(_, &count)| count == *max_count) + .map(|(word, &count)| (word.as_str(), count)) + .collect() + } + + /// Calculate statistics about the words. + fn get_statistics(&self) -> WordStatistics { + let total_words: usize = self.word_counts.values().sum(); + let unique_words = self.word_counts.len(); + + let total_length: usize = + self.word_counts.iter().map(|(word, count)| word.len() * count).sum(); + + let avg_word_length = if total_words > 0 { + total_length as f64 / total_words as f64 + } else { + 0.0 + }; + + // Find the most common words (up to 5) + let mut most_common: Vec<_> = self + .word_counts + .iter() + .map(|(word, &count)| (word.to_string(), count)) + .collect(); + most_common.sort_by(|a, b| b.1.cmp(&a.1)); + most_common.truncate(5); + + WordStatistics { total_words, unique_words, avg_word_length, most_common } + } +} + +/// Statistics about words in a text. +struct WordStatistics { + total_words: usize, + unique_words: usize, + avg_word_length: f64, + most_common: Vec<(String, usize)>, +} +// ANCHOR_END: word_counter + +// ANCHOR: tests +#[cfg(test)] +mod test { + use super::*; + + // Tests for the basic word counting function + #[test] + fn test_count_words_function() { + let text = "The quick brown fox jumps over the lazy dog"; + let counts = count_words(text); + assert_eq!(counts.get("the").copied(), Some(2)); + assert_eq!(counts.get("quick").copied(), Some(1)); + assert_eq!(counts.get("missing").copied(), None); + } + + // Tests for the WordCounter struct + #[test] + fn test_empty_counter() { + let counter = WordCounter::new(); + assert_eq!(counter.word_count("any"), 0); + assert!(counter.most_frequent().is_empty()); + } + + #[test] + fn test_simple_text() { + let mut counter = WordCounter::new(); + counter.count_words("Hello world, hello Rust!"); + assert_eq!(counter.word_count("hello"), 2); + assert_eq!(counter.word_count("rust"), 1); + assert_eq!(counter.word_count("world"), 1); + } + + #[test] + fn test_case_insensitive() { + let mut counter = WordCounter::new(); + counter.count_words("Hello HELLO hello"); + assert_eq!(counter.word_count("hello"), 3); + assert_eq!(counter.word_count("HELLO"), 3); + } + + #[test] + fn test_most_frequent() { + let mut counter = WordCounter::new(); + counter.count_words("hello world hello rust hello"); + let most_frequent = counter.most_frequent(); + assert_eq!(most_frequent, vec![("hello", 3)]); + } + + // Tests for the statistics functionality + #[test] + fn test_statistics() { + let mut counter = WordCounter::new(); + counter.count_words("hello world hello rust hello programming"); + let stats = counter.get_statistics(); + assert_eq!(stats.total_words, 6); + assert_eq!(stats.unique_words, 4); + assert!(stats.avg_word_length > 0.0); + assert_eq!(stats.most_common[0].0, "hello"); + assert_eq!(stats.most_common[0].1, 3); + } +} +// ANCHOR_END: tests + +// ANCHOR: main +fn main() { + let text = + "The quick brown fox jumps over the lazy dog. The fox is quick and brown."; + let mut counter = WordCounter::new(); + counter.count_words(text); + + println!("Word Counts:"); + let mut words: Vec<_> = counter.word_counts.iter().collect(); + words.sort_by(|a, b| b.1.cmp(a.1).then(a.0.cmp(b.0))); // Sort by frequency, then alphabetically + + for (word, count) in words { + println!("{}: {}", word, count); + } + + println!("\nStatistics:"); + let stats = counter.get_statistics(); + println!("Total words: {}", stats.total_words); + println!("Unique words: {}", stats.unique_words); + println!("Average word length: {:.2}", stats.avg_word_length); + + println!("\nMost common words:"); + for (word, count) in stats.most_common { + println!("{}: {} occurrences", word, count); + } +} +// ANCHOR_END: main diff --git a/src/std-types/word_counter_project/Cargo.lock b/src/std-types/word_counter_project/Cargo.lock new file mode 100644 index 000000000000..363f9324dd39 --- /dev/null +++ b/src/std-types/word_counter_project/Cargo.lock @@ -0,0 +1,261 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "anstream" +version = "0.6.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8acc5369981196006228e28809f761875c0327210a891e941f4c683b3a99529b" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55cc3b69f167a1ef2e161439aa98aed94e6028e5f9a59be9a6ffb47aef1651f9" + +[[package]] +name = "anstyle-parse" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b2d16507662817a6a20a9ea92df6652ee4f94f914589377d69f3b21bc5798a9" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79947af37f4177cfead1110013d678905c37501914fba0efea834c3fe9a8d60c" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca3534e77181a9cc07539ad51f2141fe32f6c3ffd4df76db8ad92346b003ae4e" +dependencies = [ + "anstyle", + "once_cell", + "windows-sys", +] + +[[package]] +name = "clap" +version = "4.5.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2df961d8c8a0d08aa9945718ccf584145eee3f3aa06cddbeac12933781102e04" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.5.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "132dbda40fb6753878316a489d5a1242a8ef2f0d9e47ba01c951ea8aa7d013a5" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.5.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09176aae279615badda0765c0c0b3f6ed53f4709118af73cf4655d85d1530cd7" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "clap_lex" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f46ad14479a25103f283c0f10005961cf086d8dc42205bb44c46ac563475dca6" + +[[package]] +name = "colorchoice" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990" + +[[package]] +name = "colored" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "117725a109d387c937a1533ce01b450cbde6b88abceea8473c4d7a85853cda3c" +dependencies = [ + "lazy_static", + "windows-sys", +] + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "is_terminal_polyfill" +version = "1.70.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" + +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + +[[package]] +name = "once_cell" +version = "1.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" + +[[package]] +name = "proc-macro2" +version = "1.0.95" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + +[[package]] +name = "syn" +version = "2.0.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b09a44accad81e1ba1cd74a32461ba89dee89095ba17b32f5d03683b1b1fc2a0" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "unicode-ident" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" + +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + +[[package]] +name = "word_counter" +version = "0.1.0" +dependencies = [ + "clap", + "colored", +] diff --git a/src/std-types/word_counter_project/Cargo.toml b/src/std-types/word_counter_project/Cargo.toml new file mode 100644 index 000000000000..4b6ebd8c84d1 --- /dev/null +++ b/src/std-types/word_counter_project/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "word_counter" +version = "0.1.0" +edition = "2021" + +[workspace] + +[dependencies] +clap = { version = "4.4", features = ["derive"] } +colored = "2.0" \ No newline at end of file diff --git a/src/std-types/word_counter_project/LICENSE b/src/std-types/word_counter_project/LICENSE new file mode 100644 index 000000000000..8902c0a73195 --- /dev/null +++ b/src/std-types/word_counter_project/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2025 Comprehensive Rust Course Contributors + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/src/std-types/word_counter_project/README.md b/src/std-types/word_counter_project/README.md new file mode 100644 index 000000000000..abcd770478ce --- /dev/null +++ b/src/std-types/word_counter_project/README.md @@ -0,0 +1,73 @@ +# Word Counter + +A Rust program that analyzes word frequencies in text, providing detailed statistics and various input methods. + +## Features + +- Multiple input methods: + - Direct text input + - File input + - Interactive mode +- Case-sensitive or case-insensitive word counting +- Detailed statistics: + - Total word count + - Unique word count + - Average word length + - Most common words +- Colored output for better readability +- Error handling for file operations + +## Usage + +```bash +# Count words in text +cargo run -- --text "Your text here" + +# Count words from a file +cargo run -- --file "path/to/your/file.txt" + +# Use interactive mode +cargo run -- --interactive + +# Show detailed statistics +cargo run -- --text "Your text here" --stats + +# Enable case-sensitive counting +cargo run -- --text "Your text here" --case-sensitive +``` + +### Command Line Options + +- `--text, -t`: Directly input text to analyze +- `--file, -f`: Specify a file to read text from +- `--interactive, -i`: Enter interactive mode for continuous input +- `--case-sensitive, -c`: Enable case-sensitive word counting +- `--stats, -s`: Show detailed statistics + +## Installation + +1. Make sure you have Rust installed +2. Clone the repository +3. Run `cargo build --release` +4. The binary will be available in `target/release/word_counter` + +## Example + +```bash +cargo run -- --text "The quick brown fox jumps over the lazy dog" --stats +``` + +This will output: +- Word frequency counts +- Total word count +- Number of unique words +- Average word length +- Most common words + +## Contributing + +This is part of the Comprehensive Rust course. Feel free to submit issues and enhancement requests. + +## License + +This project is licensed under the MIT License - see the LICENSE file for details. \ No newline at end of file diff --git a/src/std-types/word_counter_project/src/main.rs b/src/std-types/word_counter_project/src/main.rs new file mode 100644 index 000000000000..edb675ec48ff --- /dev/null +++ b/src/std-types/word_counter_project/src/main.rs @@ -0,0 +1,151 @@ +use std::collections::HashMap; +use std::fs; +use std::io; +use clap::Parser; +use colored::*; + +/// Word Counter - A program to analyze word frequencies in text +#[derive(Parser)] +#[command(author, version, about, long_about = None)] +struct Args { + /// Text to analyze directly + #[arg(short, long)] + text: Option, + + /// File to read text from + #[arg(short, long)] + file: Option, + + /// Read from standard input + #[arg(short, long)] + interactive: bool, + + /// Case sensitive counting + #[arg(short, long)] + case_sensitive: bool, + + /// Show detailed statistics + #[arg(short, long)] + stats: bool, +} + +#[derive(Default)] +struct Statistics { + total_words: usize, + unique_words: usize, + avg_word_length: f64, + most_common: Vec<(String, usize)>, +} + +struct WordCounter { + word_counts: HashMap, + case_sensitive: bool, +} + +impl WordCounter { + fn new(case_sensitive: bool) -> Self { + WordCounter { + word_counts: HashMap::new(), + case_sensitive, + } + } + + fn count_words(&mut self, text: &str) { + for word in text.split_whitespace() { + let word = if !self.case_sensitive { + word.to_lowercase() + } else { + word.to_string() + }; + *self.word_counts.entry(word).or_insert(0) += 1; + } + } + + fn get_statistics(&self) -> Statistics { + let total_words: usize = self.word_counts.values().sum(); + let unique_words = self.word_counts.len(); + + let total_length: usize = self.word_counts + .iter() + .map(|(word, count)| word.len() * count) + .sum(); + + let avg_word_length = if total_words > 0 { + total_length as f64 / total_words as f64 + } else { + 0.0 + }; + + let mut most_common: Vec<_> = self.word_counts + .iter() + .map(|(word, &count)| (word.clone(), count)) + .collect(); + most_common.sort_by(|a, b| b.1.cmp(&a.1)); + most_common.truncate(5); + + Statistics { + total_words, + unique_words, + avg_word_length, + most_common, + } + } + + fn print_results(&self, show_stats: bool) { + println!("\n{}", "Word Counts:".green().bold()); + let mut words: Vec<_> = self.word_counts.iter().collect(); + words.sort_by(|a, b| b.1.cmp(a.1).then(a.0.cmp(b.0))); + + for (word, count) in words { + println!("{}: {}", word.cyan(), count.to_string().yellow()); + } + + if show_stats { + let stats = self.get_statistics(); + println!("\n{}", "Statistics:".green().bold()); + println!("Total words: {}", stats.total_words.to_string().yellow()); + println!("Unique words: {}", stats.unique_words.to_string().yellow()); + println!("Average word length: {:.2}", stats.avg_word_length.to_string().yellow()); + + println!("\n{}", "Most common words:".green().bold()); + for (word, count) in stats.most_common { + println!("{}: {} occurrences", word.cyan(), count.to_string().yellow()); + } + } + } +} + +fn interactive_mode(counter: &mut WordCounter, _show_stats: bool) { + println!("{}", "\nInteractive Mode - Enter text (press Ctrl+D or Ctrl+Z to finish):".green().bold()); + let stdin = io::stdin(); + let mut buffer = String::new(); + + while stdin.read_line(&mut buffer).unwrap_or(0) > 0 { + counter.count_words(&buffer); + buffer.clear(); + } +} + +fn main() { + let args = Args::parse(); + let mut counter = WordCounter::new(!args.case_sensitive); + + if args.interactive { + interactive_mode(&mut counter, args.stats); + } else if let Some(text) = args.text { + counter.count_words(&text); + } else if let Some(file) = args.file { + match fs::read_to_string(file) { + Ok(content) => counter.count_words(&content), + Err(e) => { + eprintln!("{}: {}", "Error reading file".red().bold(), e); + std::process::exit(1); + } + } + } else { + eprintln!("{}", "Please provide --text, --file, or use --interactive mode".red().bold()); + std::process::exit(1); + } + + counter.print_results(args.stats); +} \ No newline at end of file diff --git a/src/std-types/word_counter_solution.md b/src/std-types/word_counter_solution.md new file mode 100644 index 000000000000..ffd4fa4f3996 --- /dev/null +++ b/src/std-types/word_counter_solution.md @@ -0,0 +1,60 @@ +# Solution: Word Counter + +Here's a solution for the Word Counter exercise using a `HashMap` to track word +frequencies: + +```rust +{{#include word_counter.rs:exercise}} +``` + +This solution: + +1. Implements `count_words` which: + - Creates an empty `HashMap` to store word counts + - Iterates through words in the input text (split by whitespace) + - Converts each word to lowercase for case-insensitive counting + - Uses the `entry` API to increment the count for each word + +2. Implements `print_word_counts` which: + - Collects and sorts the keys (words) alphabetically + - Prints each word with its count + +## Advanced Implementation + +For more advanced needs, a `WordCounter` struct provides additional +functionality: + +```rust +{{#include word_counter.rs:word_counter}} +``` + +The struct-based approach offers more methods like `most_frequent()` which finds +the most common word(s) in the text and `get_statistics()` which calculates +various metrics about the words. + +## Statistics Example + +The `WordStatistics` struct and the `get_statistics()` method show how to +calculate: + +- Total word count +- Unique word count +- Average word length +- Most frequently used words + +## Key Learning Points + +1. **Using HashMaps**: The solution demonstrates how to use a HashMap to + associate words with their counts. + +2. **Entry API**: The code uses `entry().or_insert(0)` to efficiently handle the + case where a word is seen for the first time. + +3. **String manipulation**: Words are converted to lowercase for + case-insensitive comparison and punctuation is filtered out. + +4. **Sorting and iteration**: The solution shows how to collect keys from a + HashMap, sort them, and iterate through them. + +5. **Testing**: The comprehensive tests verify word counting, + case-insensitivity, punctuation handling, and statistics calculations. diff --git a/src/std-types/word_counter_solution.rs b/src/std-types/word_counter_solution.rs new file mode 100644 index 000000000000..f07f12f000a3 --- /dev/null +++ b/src/std-types/word_counter_solution.rs @@ -0,0 +1,50 @@ +use std::collections::HashMap; + +/// Count the frequency of words in a text. +/// Returns a HashMap with words as keys and their counts as values. +fn count_words(text: &str) -> HashMap { + let mut word_counts = HashMap::new(); + + for word in text.split_whitespace() { + let word = word.to_lowercase(); + *word_counts.entry(word).or_insert(0) += 1; + } + + word_counts +} + +/// Print word counts in alphabetical order +fn print_word_counts(counts: &HashMap) { + let mut words: Vec<_> = counts.keys().collect(); + words.sort(); + + for word in words { + println!("{}: {}", word, counts[word]); + } +} + +#[test] +fn test_empty_string() { + let counts = count_words(""); + assert!(counts.is_empty()); +} + +#[test] +fn test_simple_text() { + let counts = count_words("the quick brown fox jumps over the lazy dog"); + assert_eq!(counts["the"], 2); + assert_eq!(counts["fox"], 1); + assert_eq!(counts.len(), 8); +} + +#[test] +fn test_case_insensitive() { + let counts = count_words("The THE the"); + assert_eq!(counts["the"], 3); +} + +fn main() { + let text = "the quick brown fox jumps over the lazy dog"; + let counts = count_words(text); + print_word_counts(&counts); +} \ No newline at end of file diff --git a/src/std-types/word_counter_test b/src/std-types/word_counter_test new file mode 100644 index 000000000000..1fc5a67cbf53 Binary files /dev/null and b/src/std-types/word_counter_test differ diff --git a/src/std-types/word_counter_test.pdb b/src/std-types/word_counter_test.pdb new file mode 100644 index 000000000000..3ba90acc30d6 Binary files /dev/null and b/src/std-types/word_counter_test.pdb differ diff --git a/test-exercise/main.rs b/test-exercise/main.rs new file mode 100644 index 000000000000..0fc23c7c7f87 Binary files /dev/null and b/test-exercise/main.rs differ