From c618774129a0148eac8b08fb947dac922d17208f Mon Sep 17 00:00:00 2001 From: rkt-1597 Date: Thu, 24 Oct 2024 20:43:00 +0530 Subject: [PATCH 1/3] Added requirments.txt file --- Word_frequency_counter/README.md | 29 ++++++++++++++++++++++++++ Word_frequency_counter/main.py | 28 +++++++++++++++++++++++++ Word_frequency_counter/requirments.txt | 1 + Word_frequency_counter/runtime.txt | 1 + 4 files changed, 59 insertions(+) create mode 100644 Word_frequency_counter/README.md create mode 100644 Word_frequency_counter/main.py create mode 100644 Word_frequency_counter/requirments.txt create mode 100644 Word_frequency_counter/runtime.txt diff --git a/Word_frequency_counter/README.md b/Word_frequency_counter/README.md new file mode 100644 index 0000000..679d125 --- /dev/null +++ b/Word_frequency_counter/README.md @@ -0,0 +1,29 @@ +# Word Frequency Counter + +A simple python script that counts the number of words in a given text document and prints the top 10 words according to their frequency, along with their frequency of occurence. + +--- + +- **Input :** Path of the text file to be processed +- **Output :** List of top 10 words according to their frequency, along with their frequency of occurence. + +--- + +## Features : + +- User friendly interface +- Output is in tabular format +- Case insensitive processing of words +- Get the Top 10 words in the text file which occur most frequntly, along with their counts + +--- + +## Usage : + +1. Clone the repository +2. Navigate to the project folder +3. Run the command : + +```python +python3 main.py +``` \ No newline at end of file diff --git a/Word_frequency_counter/main.py b/Word_frequency_counter/main.py new file mode 100644 index 0000000..c0b39ad --- /dev/null +++ b/Word_frequency_counter/main.py @@ -0,0 +1,28 @@ +import regex as re +from collections import Counter + +def find_words_frequency(file_path): + ''' + This script takes the path of the text file to be processed as input + and prints the top ten words and also prints their counts in the given text file. + ''' + with open(file_path, 'r', encoding='utf-8') as file: + text = file.read().lower() + + # Use `regex`'s findall function + all_words = re.findall(r'\b\p{L}+\b', text) + word_frequency = Counter(all_words) + most_common_words = word_frequency.most_common(10) + + # Print in tabular format + print(f"{'Word':<15} {'Count':<5}") + print("-" * 20) + for word, count in most_common_words: + print(f"{word:<15} {count:<5}") + +def main(): + file_path = input("Enter the path of file : ") + find_words_frequency(file_path) + +if __name__ == "__main__": + main() diff --git a/Word_frequency_counter/requirments.txt b/Word_frequency_counter/requirments.txt new file mode 100644 index 0000000..742e6f7 --- /dev/null +++ b/Word_frequency_counter/requirments.txt @@ -0,0 +1 @@ +regex=2.5.147 diff --git a/Word_frequency_counter/runtime.txt b/Word_frequency_counter/runtime.txt new file mode 100644 index 0000000..f023023 --- /dev/null +++ b/Word_frequency_counter/runtime.txt @@ -0,0 +1 @@ +python-3.10.7 \ No newline at end of file From e4199e3440d20cfef54bf10bf06d0e37b242ad97 Mon Sep 17 00:00:00 2001 From: rkt-1597 Date: Thu, 24 Oct 2024 20:44:15 +0530 Subject: [PATCH 2/3] Removed reuirments.txt file --- Word_frequency_counter/README.md | 29 -------------------------- Word_frequency_counter/main.py | 28 ------------------------- Word_frequency_counter/requirments.txt | 1 - Word_frequency_counter/runtime.txt | 1 - 4 files changed, 59 deletions(-) delete mode 100644 Word_frequency_counter/README.md delete mode 100644 Word_frequency_counter/main.py delete mode 100644 Word_frequency_counter/requirments.txt delete mode 100644 Word_frequency_counter/runtime.txt diff --git a/Word_frequency_counter/README.md b/Word_frequency_counter/README.md deleted file mode 100644 index 679d125..0000000 --- a/Word_frequency_counter/README.md +++ /dev/null @@ -1,29 +0,0 @@ -# Word Frequency Counter - -A simple python script that counts the number of words in a given text document and prints the top 10 words according to their frequency, along with their frequency of occurence. - ---- - -- **Input :** Path of the text file to be processed -- **Output :** List of top 10 words according to their frequency, along with their frequency of occurence. - ---- - -## Features : - -- User friendly interface -- Output is in tabular format -- Case insensitive processing of words -- Get the Top 10 words in the text file which occur most frequntly, along with their counts - ---- - -## Usage : - -1. Clone the repository -2. Navigate to the project folder -3. Run the command : - -```python -python3 main.py -``` \ No newline at end of file diff --git a/Word_frequency_counter/main.py b/Word_frequency_counter/main.py deleted file mode 100644 index c0b39ad..0000000 --- a/Word_frequency_counter/main.py +++ /dev/null @@ -1,28 +0,0 @@ -import regex as re -from collections import Counter - -def find_words_frequency(file_path): - ''' - This script takes the path of the text file to be processed as input - and prints the top ten words and also prints their counts in the given text file. - ''' - with open(file_path, 'r', encoding='utf-8') as file: - text = file.read().lower() - - # Use `regex`'s findall function - all_words = re.findall(r'\b\p{L}+\b', text) - word_frequency = Counter(all_words) - most_common_words = word_frequency.most_common(10) - - # Print in tabular format - print(f"{'Word':<15} {'Count':<5}") - print("-" * 20) - for word, count in most_common_words: - print(f"{word:<15} {count:<5}") - -def main(): - file_path = input("Enter the path of file : ") - find_words_frequency(file_path) - -if __name__ == "__main__": - main() diff --git a/Word_frequency_counter/requirments.txt b/Word_frequency_counter/requirments.txt deleted file mode 100644 index 742e6f7..0000000 --- a/Word_frequency_counter/requirments.txt +++ /dev/null @@ -1 +0,0 @@ -regex=2.5.147 diff --git a/Word_frequency_counter/runtime.txt b/Word_frequency_counter/runtime.txt deleted file mode 100644 index f023023..0000000 --- a/Word_frequency_counter/runtime.txt +++ /dev/null @@ -1 +0,0 @@ -python-3.10.7 \ No newline at end of file From 8d8d056fdae4c2646ac11d092d98f00aaf1d10e7 Mon Sep 17 00:00:00 2001 From: rkt-1597 Date: Thu, 24 Oct 2024 20:45:01 +0530 Subject: [PATCH 3/3] Added requirments.txt file --- Word_frequency_counter/README.md | 29 ++++++++++++++++++++++++++ Word_frequency_counter/main.py | 28 +++++++++++++++++++++++++ Word_frequency_counter/requirments.txt | 1 + Word_frequency_counter/runtime.txt | 1 + 4 files changed, 59 insertions(+) create mode 100644 Word_frequency_counter/README.md create mode 100644 Word_frequency_counter/main.py create mode 100644 Word_frequency_counter/requirments.txt create mode 100644 Word_frequency_counter/runtime.txt diff --git a/Word_frequency_counter/README.md b/Word_frequency_counter/README.md new file mode 100644 index 0000000..679d125 --- /dev/null +++ b/Word_frequency_counter/README.md @@ -0,0 +1,29 @@ +# Word Frequency Counter + +A simple python script that counts the number of words in a given text document and prints the top 10 words according to their frequency, along with their frequency of occurence. + +--- + +- **Input :** Path of the text file to be processed +- **Output :** List of top 10 words according to their frequency, along with their frequency of occurence. + +--- + +## Features : + +- User friendly interface +- Output is in tabular format +- Case insensitive processing of words +- Get the Top 10 words in the text file which occur most frequntly, along with their counts + +--- + +## Usage : + +1. Clone the repository +2. Navigate to the project folder +3. Run the command : + +```python +python3 main.py +``` \ No newline at end of file diff --git a/Word_frequency_counter/main.py b/Word_frequency_counter/main.py new file mode 100644 index 0000000..c0b39ad --- /dev/null +++ b/Word_frequency_counter/main.py @@ -0,0 +1,28 @@ +import regex as re +from collections import Counter + +def find_words_frequency(file_path): + ''' + This script takes the path of the text file to be processed as input + and prints the top ten words and also prints their counts in the given text file. + ''' + with open(file_path, 'r', encoding='utf-8') as file: + text = file.read().lower() + + # Use `regex`'s findall function + all_words = re.findall(r'\b\p{L}+\b', text) + word_frequency = Counter(all_words) + most_common_words = word_frequency.most_common(10) + + # Print in tabular format + print(f"{'Word':<15} {'Count':<5}") + print("-" * 20) + for word, count in most_common_words: + print(f"{word:<15} {count:<5}") + +def main(): + file_path = input("Enter the path of file : ") + find_words_frequency(file_path) + +if __name__ == "__main__": + main() diff --git a/Word_frequency_counter/requirments.txt b/Word_frequency_counter/requirments.txt new file mode 100644 index 0000000..742e6f7 --- /dev/null +++ b/Word_frequency_counter/requirments.txt @@ -0,0 +1 @@ +regex=2.5.147 diff --git a/Word_frequency_counter/runtime.txt b/Word_frequency_counter/runtime.txt new file mode 100644 index 0000000..f023023 --- /dev/null +++ b/Word_frequency_counter/runtime.txt @@ -0,0 +1 @@ +python-3.10.7 \ No newline at end of file