From 3c201a6617ebfb626a0df4f33ea344d2d06a75d7 Mon Sep 17 00:00:00 2001 From: rkt-1597 Date: Sun, 20 Oct 2024 07:54:22 +0530 Subject: [PATCH 1/6] Added script and README file for Word Frequency Counter --- Word_frequency_counter/README.md | 29 +++++++++++++++++++++++++++++ Word_frequency_counter/main.py | 27 +++++++++++++++++++++++++++ Word_frequency_counter/runtime.txt | 1 + 3 files changed, 57 insertions(+) create mode 100644 Word_frequency_counter/README.md create mode 100644 Word_frequency_counter/main.py create mode 100644 Word_frequency_counter/runtime.txt diff --git a/Word_frequency_counter/README.md b/Word_frequency_counter/README.md new file mode 100644 index 0000000..679d125 --- /dev/null +++ b/Word_frequency_counter/README.md @@ -0,0 +1,29 @@ +# Word Frequency Counter + +A simple python script that counts the number of words in a given text document and prints the top 10 words according to their frequency, along with their frequency of occurence. + +--- + +- **Input :** Path of the text file to be processed +- **Output :** List of top 10 words according to their frequency, along with their frequency of occurence. + +--- + +## Features : + +- User friendly interface +- Output is in tabular format +- Case insensitive processing of words +- Get the Top 10 words in the text file which occur most frequntly, along with their counts + +--- + +## Usage : + +1. Clone the repository +2. Navigate to the project folder +3. Run the command : + +```python +python3 main.py +``` \ No newline at end of file diff --git a/Word_frequency_counter/main.py b/Word_frequency_counter/main.py new file mode 100644 index 0000000..8f41a33 --- /dev/null +++ b/Word_frequency_counter/main.py @@ -0,0 +1,27 @@ +import re +from collections import Counter + +def find_words_frequency(file_path): + ''' + This script takes the path of the text file to be processed, as input (argument) + and prints the top ten words and also prints their counts in given text file. + ''' + with open(file_path, 'r', encoding='utf-8') as file: + text = file.read().lower() + + all_words = re.findall(r'\b\w+\b', text) + word_frequency = Counter(all_words) + most_common_words = word_frequency.most_common(10) + + # Print in tabular format + print(f"{'Word':<15} {'Count':<5}") + print("-" * 20) + for word, count in most_common_words: + print(f"{word:<15} {count:<5}") + +def main(): + file_path = input("Enter the path of file : ") + find_words_frequency(file_path) + +if __name__ == "__main__": + main() diff --git a/Word_frequency_counter/runtime.txt b/Word_frequency_counter/runtime.txt new file mode 100644 index 0000000..f023023 --- /dev/null +++ b/Word_frequency_counter/runtime.txt @@ -0,0 +1 @@ +python-3.10.7 \ No newline at end of file From aec98e04057849df324991e5736cdaa4016efead Mon Sep 17 00:00:00 2001 From: rkt-1597 Date: Sun, 20 Oct 2024 10:14:58 +0530 Subject: [PATCH 2/6] Without addition --- Word_frequency_counter/README.md | 29 ----------------------------- Word_frequency_counter/main.py | 27 --------------------------- Word_frequency_counter/runtime.txt | 1 - 3 files changed, 57 deletions(-) delete mode 100644 Word_frequency_counter/README.md delete mode 100644 Word_frequency_counter/main.py delete mode 100644 Word_frequency_counter/runtime.txt diff --git a/Word_frequency_counter/README.md b/Word_frequency_counter/README.md deleted file mode 100644 index 679d125..0000000 --- a/Word_frequency_counter/README.md +++ /dev/null @@ -1,29 +0,0 @@ -# Word Frequency Counter - -A simple python script that counts the number of words in a given text document and prints the top 10 words according to their frequency, along with their frequency of occurence. - ---- - -- **Input :** Path of the text file to be processed -- **Output :** List of top 10 words according to their frequency, along with their frequency of occurence. - ---- - -## Features : - -- User friendly interface -- Output is in tabular format -- Case insensitive processing of words -- Get the Top 10 words in the text file which occur most frequntly, along with their counts - ---- - -## Usage : - -1. Clone the repository -2. Navigate to the project folder -3. Run the command : - -```python -python3 main.py -``` \ No newline at end of file diff --git a/Word_frequency_counter/main.py b/Word_frequency_counter/main.py deleted file mode 100644 index 8f41a33..0000000 --- a/Word_frequency_counter/main.py +++ /dev/null @@ -1,27 +0,0 @@ -import re -from collections import Counter - -def find_words_frequency(file_path): - ''' - This script takes the path of the text file to be processed, as input (argument) - and prints the top ten words and also prints their counts in given text file. - ''' - with open(file_path, 'r', encoding='utf-8') as file: - text = file.read().lower() - - all_words = re.findall(r'\b\w+\b', text) - word_frequency = Counter(all_words) - most_common_words = word_frequency.most_common(10) - - # Print in tabular format - print(f"{'Word':<15} {'Count':<5}") - print("-" * 20) - for word, count in most_common_words: - print(f"{word:<15} {count:<5}") - -def main(): - file_path = input("Enter the path of file : ") - find_words_frequency(file_path) - -if __name__ == "__main__": - main() diff --git a/Word_frequency_counter/runtime.txt b/Word_frequency_counter/runtime.txt deleted file mode 100644 index f023023..0000000 --- a/Word_frequency_counter/runtime.txt +++ /dev/null @@ -1 +0,0 @@ -python-3.10.7 \ No newline at end of file From af07369341fd191ffdebe10ae7e1fadf75d90df7 Mon Sep 17 00:00:00 2001 From: rkt-1597 Date: Sun, 20 Oct 2024 10:15:50 +0530 Subject: [PATCH 3/6] Added script and README for Word frequency counter --- Word_frequency_counter/README.md | 29 +++++++++++++++++++++++++++++ Word_frequency_counter/main.py | 27 +++++++++++++++++++++++++++ Word_frequency_counter/runtime.txt | 1 + 3 files changed, 57 insertions(+) create mode 100644 Word_frequency_counter/README.md create mode 100644 Word_frequency_counter/main.py create mode 100644 Word_frequency_counter/runtime.txt diff --git a/Word_frequency_counter/README.md b/Word_frequency_counter/README.md new file mode 100644 index 0000000..679d125 --- /dev/null +++ b/Word_frequency_counter/README.md @@ -0,0 +1,29 @@ +# Word Frequency Counter + +A simple python script that counts the number of words in a given text document and prints the top 10 words according to their frequency, along with their frequency of occurence. + +--- + +- **Input :** Path of the text file to be processed +- **Output :** List of top 10 words according to their frequency, along with their frequency of occurence. + +--- + +## Features : + +- User friendly interface +- Output is in tabular format +- Case insensitive processing of words +- Get the Top 10 words in the text file which occur most frequntly, along with their counts + +--- + +## Usage : + +1. Clone the repository +2. Navigate to the project folder +3. Run the command : + +```python +python3 main.py +``` \ No newline at end of file diff --git a/Word_frequency_counter/main.py b/Word_frequency_counter/main.py new file mode 100644 index 0000000..8f41a33 --- /dev/null +++ b/Word_frequency_counter/main.py @@ -0,0 +1,27 @@ +import re +from collections import Counter + +def find_words_frequency(file_path): + ''' + This script takes the path of the text file to be processed, as input (argument) + and prints the top ten words and also prints their counts in given text file. + ''' + with open(file_path, 'r', encoding='utf-8') as file: + text = file.read().lower() + + all_words = re.findall(r'\b\w+\b', text) + word_frequency = Counter(all_words) + most_common_words = word_frequency.most_common(10) + + # Print in tabular format + print(f"{'Word':<15} {'Count':<5}") + print("-" * 20) + for word, count in most_common_words: + print(f"{word:<15} {count:<5}") + +def main(): + file_path = input("Enter the path of file : ") + find_words_frequency(file_path) + +if __name__ == "__main__": + main() diff --git a/Word_frequency_counter/runtime.txt b/Word_frequency_counter/runtime.txt new file mode 100644 index 0000000..f023023 --- /dev/null +++ b/Word_frequency_counter/runtime.txt @@ -0,0 +1 @@ +python-3.10.7 \ No newline at end of file From f1c67e8baf94b2ce9306ededb1f5ec549b29144c Mon Sep 17 00:00:00 2001 From: rkt-1597 Date: Thu, 24 Oct 2024 20:37:37 +0530 Subject: [PATCH 4/6] Added requirments.txt file --- Word_frequency_counter/main.py | 9 +++++---- Word_frequency_counter/requirments.txt | 1 + 2 files changed, 6 insertions(+), 4 deletions(-) create mode 100644 Word_frequency_counter/requirments.txt diff --git a/Word_frequency_counter/main.py b/Word_frequency_counter/main.py index 8f41a33..c0b39ad 100644 --- a/Word_frequency_counter/main.py +++ b/Word_frequency_counter/main.py @@ -1,15 +1,16 @@ -import re +import regex as re from collections import Counter def find_words_frequency(file_path): ''' - This script takes the path of the text file to be processed, as input (argument) - and prints the top ten words and also prints their counts in given text file. + This script takes the path of the text file to be processed as input + and prints the top ten words and also prints their counts in the given text file. ''' with open(file_path, 'r', encoding='utf-8') as file: text = file.read().lower() - all_words = re.findall(r'\b\w+\b', text) + # Use `regex`'s findall function + all_words = re.findall(r'\b\p{L}+\b', text) word_frequency = Counter(all_words) most_common_words = word_frequency.most_common(10) diff --git a/Word_frequency_counter/requirments.txt b/Word_frequency_counter/requirments.txt new file mode 100644 index 0000000..742e6f7 --- /dev/null +++ b/Word_frequency_counter/requirments.txt @@ -0,0 +1 @@ +regex=2.5.147 From 5cca655ddca44f7fdf91008daeefc035ee0a4308 Mon Sep 17 00:00:00 2001 From: rkt-1597 Date: Thu, 24 Oct 2024 20:39:09 +0530 Subject: [PATCH 5/6] Changig requirments.txt file --- Word_frequency_counter/requirments.txt | 1 - 1 file changed, 1 deletion(-) delete mode 100644 Word_frequency_counter/requirments.txt diff --git a/Word_frequency_counter/requirments.txt b/Word_frequency_counter/requirments.txt deleted file mode 100644 index 742e6f7..0000000 --- a/Word_frequency_counter/requirments.txt +++ /dev/null @@ -1 +0,0 @@ -regex=2.5.147 From a0d55d6c82bafceded1f23687b09679d4d69184a Mon Sep 17 00:00:00 2001 From: rkt-1597 Date: Thu, 24 Oct 2024 20:39:41 +0530 Subject: [PATCH 6/6] Added requiments.txt file --- Word_frequency_counter/requirments.txt | 1 + 1 file changed, 1 insertion(+) create mode 100644 Word_frequency_counter/requirments.txt diff --git a/Word_frequency_counter/requirments.txt b/Word_frequency_counter/requirments.txt new file mode 100644 index 0000000..742e6f7 --- /dev/null +++ b/Word_frequency_counter/requirments.txt @@ -0,0 +1 @@ +regex=2.5.147