From d1200cbed435e6dbd32d38f23a598b250507abda Mon Sep 17 00:00:00 2001
From: sushanth shetty <11s.shettyy@gmail.com>
Date: Thu, 5 Dec 2024 21:04:36 +0545
Subject: [PATCH 1/2] sushanth

---
 compression/huffman.py | 147 +++++++++++++++++++++--------------------
 1 file changed, 77 insertions(+), 70 deletions(-)

diff --git a/compression/huffman.py b/compression/huffman.py
index 44eda6c03180..d591d4be4616 100644
--- a/compression/huffman.py
+++ b/compression/huffman.py
@@ -1,92 +1,99 @@
-from __future__ import annotations
-
+import heapq
+from collections import defaultdict
 import sys
 
+class HuffmanNode:
+    def __init__(self, char=None, freq=0):
+        self.char = char
+        self.freq = freq
+        self.left = None
+        self.right = None
 
-class Letter:
-    def __init__(self, letter: str, freq: int):
-        self.letter: str = letter
-        self.freq: int = freq
-        self.bitstring: dict[str, str] = {}
-
-    def __repr__(self) -> str:
-        return f"{self.letter}:{self.freq}"
+    def __lt__(self, other):
+        return self.freq < other.freq
 
 
-class TreeNode:
-    def __init__(self, freq: int, left: Letter | TreeNode, right: Letter | TreeNode):
-        self.freq: int = freq
-        self.left: Letter | TreeNode = left
-        self.right: Letter | TreeNode = right
+def calculate_frequencies(file_path):
+    """
+    Reads the file and calculates the frequency of each character.
+    """
+    freq = defaultdict(int)
+    with open(file_path, 'r') as file:
+        for line in file:
+            for char in line:
+                freq[char] += 1
+    return freq
 
 
-def parse_file(file_path: str) -> list[Letter]:
+def build_huffman_tree(freq_dict):
     """
-    Read the file and build a dict of all letters and their
-    frequencies, then convert the dict into a list of Letters.
+    Builds the Huffman tree using a priority queue.
     """
-    chars: dict[str, int] = {}
-    with open(file_path) as f:
-        while True:
-            c = f.read(1)
-            if not c:
-                break
-            chars[c] = chars[c] + 1 if c in chars else 1
-    return sorted((Letter(c, f) for c, f in chars.items()), key=lambda x: x.freq)
+    priority_queue = [HuffmanNode(char, freq) for char, freq in freq_dict.items()]
+    heapq.heapify(priority_queue)
+
+    while len(priority_queue) > 1:
+        left = heapq.heappop(priority_queue)
+        right = heapq.heappop(priority_queue)
+
+        merged = HuffmanNode(freq=left.freq + right.freq)
+        merged.left = left
+        merged.right = right
+
+        heapq.heappush(priority_queue, merged)
+
+    return priority_queue[0]
 
 
-def build_tree(letters: list[Letter]) -> Letter | TreeNode:
+def generate_codes(node, current_code="", code_map=None):
     """
-    Run through the list of Letters and build the min heap
-    for the Huffman Tree.
+    Generates the Huffman codes by traversing the tree recursively.
     """
-    response: list[Letter | TreeNode] = list(letters)
-    while len(response) > 1:
-        left = response.pop(0)
-        right = response.pop(0)
-        total_freq = left.freq + right.freq
-        node = TreeNode(total_freq, left, right)
-        response.append(node)
-        response.sort(key=lambda x: x.freq)
-    return response[0]
-
-
-def traverse_tree(root: Letter | TreeNode, bitstring: str) -> list[Letter]:
+    if code_map is None:
+        code_map = {}
+
+    if node is not None:
+        if node.char is not None:
+            code_map[node.char] = current_code
+
+        generate_codes(node.left, current_code + "0", code_map)
+        generate_codes(node.right, current_code + "1", code_map)
+
+    return code_map
+
+
+def encode_file(file_path, code_map):
     """
-    Recursively traverse the Huffman Tree to set each
-    Letter's bitstring dictionary, and return the list of Letters
+    Encodes the file contents using the Huffman codes.
     """
-    if isinstance(root, Letter):
-        root.bitstring[root.letter] = bitstring
-        return [root]
-    treenode: TreeNode = root
-    letters = []
-    letters += traverse_tree(treenode.left, bitstring + "0")
-    letters += traverse_tree(treenode.right, bitstring + "1")
-    return letters
+    encoded_output = []
+    with open(file_path, 'r') as file:
+        for line in file:
+            for char in line:
+                encoded_output.append(code_map[char])
+
+    return ''.join(encoded_output)
 
 
-def huffman(file_path: str) -> None:
+def huffman(file_path):
     """
-    Parse the file, build the tree, then run through the file
-    again, using the letters dictionary to find and print out the
-    bitstring for each letter.
+    Main function to perform Huffman encoding on a given file.
     """
-    letters_list = parse_file(file_path)
-    root = build_tree(letters_list)
-    letters = {
-        k: v for letter in traverse_tree(root, "") for k, v in letter.bitstring.items()
-    }
-    print(f"Huffman Coding  of {file_path}: ")
-    with open(file_path) as f:
-        while True:
-            c = f.read(1)
-            if not c:
-                break
-            print(letters[c], end=" ")
-    print()
+    freq_dict = calculate_frequencies(file_path)
+    huffman_tree_root = build_huffman_tree(freq_dict)
+    code_map = generate_codes(huffman_tree_root)
+
+    print(f"Huffman Codes for characters in {file_path}:")
+    for char, code in code_map.items():
+        print(f"'{char}': {code}")
+
+    encoded_data = encode_file(file_path, code_map)
+    print("\nEncoded Data:")
+    print(encoded_data)
 
 
 if __name__ == "__main__":
-    # pass the file path to the huffman function
-    huffman(sys.argv[1])
+    if len(sys.argv) < 2:
+        print("Usage: python huffman.py <file_path>")
+    else:
+        huffman(sys.argv[1])

From 25bf1925461b2c07288a00070c5ae97b6294e562 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 5 Dec 2024 15:25:35 +0000
Subject: [PATCH 2/2] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 compression/huffman.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/compression/huffman.py b/compression/huffman.py
index d591d4be4616..95b69170e33b 100644
--- a/compression/huffman.py
+++ b/compression/huffman.py
@@ -2,6 +2,7 @@
 from collections import defaultdict
 import sys
 
+
 class HuffmanNode:
     def __init__(self, char=None, freq=0):
         self.char = char
@@ -18,7 +19,7 @@ def calculate_frequencies(file_path):
     Reads the file and calculates the frequency of each character.
     """
     freq = defaultdict(int)
-    with open(file_path, 'r') as file:
+    with open(file_path, "r") as file:
         for line in file:
             for char in line:
                 freq[char] += 1
@@ -67,12 +68,12 @@ def encode_file(file_path, code_map):
     Encodes the file contents using the Huffman codes.
     """
     encoded_output = []
-    with open(file_path, 'r') as file:
+    with open(file_path, "r") as file:
         for line in file:
             for char in line:
                 encoded_output.append(code_map[char])
 
-    return ''.join(encoded_output)
+    return "".join(encoded_output)
 
 
 def huffman(file_path):