fix: Returning Missing Simple Frequency Table Method

brotherzhafif · brotherzhafif · commit cb20062ff591 · 2024-10-13T13:04:49.000+07:00
diff --git a/FrequencyTable.py b/FrequencyTable.py
@@ -152,6 +152,75 @@ def PopulateGrouped(self):
                                      frequency, data_range, data_limit, data_midpoint, 
                                      bot_cumulative_frequency, top_cumulative_frequency, 
                                      relative_frequency, mode)
+    
+    # Populate Simple Table Frequency Data Method    
+    def PopulateSimple(self):
+        # Initialize general variables
+        data = sorted(set(self.dataset))  # Remove duplicates and sort the data
+        frequency = []  # To store the frequency of each class
+        top_cumulative_frequency = []  # To store top cumulative frequency for each class
+        bot_cumulative_frequency = []  # To store bottom cumulative frequency for each class
+        relative_frequency = []  # To store relative frequency for each class
+        mode = []  # To store the mode(s)
+
+        # Variables specifically for numeric data
+        top_limit = None
+        bottom_limit = None
+
+        # Check if the dataset is not entirely string-based (for numeric data)
+        if not all(isinstance(item, str) for item in self.dataset):
+            # Initialize limits for numeric data
+            top_limit = []
+            bottom_limit = []
+
+        # Single loop to process both numeric and string data
+        for current_class in data:
+            # Calculate the frequency of the current class
+            current_frequency = self.dataset.count(current_class)
+            frequency.append(current_frequency)
+
+            # Calculate the relative frequency for the current class
+            current_relative_frequency = np.round((current_frequency / self.length) * 100)
+            relative_frequency.append(current_relative_frequency)
+
+            # If the data is numeric, calculate limits and cumulative frequencies
+            if top_limit is not None and bottom_limit is not None:
+                # Calculate top and bottom limits for numeric data
+                current_top_limit = current_class + 0.5
+                current_bottom_limit = current_class - 0.5
+                top_limit.append(current_top_limit)
+                bottom_limit.append(current_bottom_limit)
+
+                # Calculate bottom cumulative frequency for numeric data
+                current_bot_cumulative_frequency = self.find_frequency(self.lowest - 1, current_class)
+                bot_cumulative_frequency.append(current_bot_cumulative_frequency)
+
+                # Calculate top cumulative frequency for numeric data
+                current_top_cumulative_frequency = self.find_frequency(current_class + 1, self.highest + 1)
+                top_cumulative_frequency.append(current_top_cumulative_frequency)
+
+            else:
+                # If the data is string-based, calculate cumulative frequencies
+                # Calculate bottom cumulative frequency for strings
+                current_bot_cumulative_frequency = self.dataset.count(current_class)
+                bot_cumulative_frequency.append(current_bot_cumulative_frequency)
+
+                # Calculate top cumulative frequency for strings
+                current_top_cumulative_frequency = sum(frequency) - current_bot_cumulative_frequency
+                top_cumulative_frequency.append(current_top_cumulative_frequency)
+
+        # Find the mode (the class with the highest frequency)
+        mode_index = [i for i, val in enumerate(frequency) if val == max(frequency)]
+        mode = [data[i] for i in mode_index]
+
+        # Create the ProcessedData object based on the data type
+        self.simple = ProcessedData(
+            data, None, None, bottom_limit, top_limit, 
+            frequency, None, None, None, 
+            bot_cumulative_frequency, top_cumulative_frequency, 
+            relative_frequency, mode
+        )
+        
 
 # Processed Data Assignment 
 class ProcessedData:
diff --git a/Main.py b/Main.py
@@ -4,67 +4,71 @@
 import tabulate as tabulate
  
 # Raw Data
-dataset = [12.5, 43.2, 56.7, 12.1, 98.3, 34.2, 78.4, 67.9, 23.5, 45.6,
-    78.1, 89.0, 32.4, 56.8, 44.5, 77.2, 12.6, 35.8, 67.1, 23.3,
-    56.5, 78.9, 99.5, 22.4, 10.2, 35.1, 48.6, 59.9, 71.3, 84.2,
-    45.3, 67.8, 89.1, 33.3, 76.4, 88.7, 41.2, 12.7, 34.4, 67.4,
-    23.8, 55.1, 77.3, 90.4, 13.5, 14.6, 55.7, 22.2, 33.1, 66.5,
-    78.2, 39.5, 41.8, 91.2, 12.4, 64.7, 49.9, 80.5, 92.3, 38.8,
-    14.5, 99.1, 25.4, 26.8, 37.5, 52.3, 43.8, 76.8, 28.7, 64.8,
-    14.9, 15.3, 48.5, 82.2, 93.4, 56.3, 88.3, 60.5, 72.9, 38.3,
-    57.2, 70.1, 84.4, 97.2, 18.6, 45.1, 66.1, 31.9, 94.5, 29.4,
-    11.9, 16.7, 21.1, 88.9, 99.7, 53.6, 62.0, 34.9, 82.8, 18.9,]
+dataset = [
+    'Mango', 'Pineapple', 'Banana', 'Banana', 'Pineapple', 'Banana', 
+    'Banana', 'Grapes', 'Pear', 'Pineapple', 'Orange', 'Strawberry', 
+    'Orange', 'Mango', 'Banana', 'Pineapple', 'Orange', 'Banana', 
+    'Strawberry', 'Pear', 'Apple', 'Banana', 'Pineapple', 'Orange', 
+    'Mango', 'Apple', 'Pear', 'Pear', 'Pear', 'Grapes', 'Pear', 
+    'Orange', 'Grapes', 'Strawberry', 'Mango', 'Orange', 'Orange', 
+    'Mango', 'Pear', 'Strawberry', 'Pear', 'Orange', 'Mango', 
+    'Mango', 'Pear', 'Grapes', 'Apple', 'Mango', 'Pineapple', 
+    'Strawberry', 'Strawberry', 'Grapes', 'Apple', 'Banana', 
+    'Grapes', 'Banana', 'Strawberry', 'Mango', 'Strawberry', 
+    'Orange', 'Pear', 'Grapes', 'Orange', 'Apple'
+]
 
 
 # Initiate Object From The Raw Data
 data = ft.FrequencyTable(dataset)
 
 # Processing Raw Data to Frequency Grouped Frequency Table
-data.PopulateGrouped() # Grouped Data
-# data.PopulateSimple() # Simple Data
+# data.PopulateGrouped() # Grouped Data
+data.PopulateSimple() # Simple Data
 
 # Transform The Data To A Frequency Table
 # Initiating The Data Using Pandas
 # Grouped Populated Data
-dfg = pd.DataFrame(
-    {
-        "Class Interval" : data.grouped.ranges,
-        "Class Limit" : data.grouped.limit,
-        "Frequency" : data.grouped.frequency,
-        "Midpoint" : data.grouped.midpoint,
-        
-        "C <" : data.grouped.bottom_limit,
-        "CF <" : data.grouped.bottom_cumulative_frequency,
-        "C >" : data.grouped.top_limit,
-        "CF >" : data.grouped.top_cumulative_frequency,
-        "Relative Frequency" : data.grouped.percentage_relative_frequency
-    }
-)
-
-# Simple Populated Data
-# dfs = pd.DataFrame(
+# dfg = pd.DataFrame(
 #     {
-#         "Class" : data.simple.classval,
-#         "Frequency" : data.simple.frequency,
-#         "Relative Frequency" : data.simple.percentage_relative_frequency
+#         "Class Interval" : data.grouped.ranges,
+#         "Class Limit" : data.grouped.limit,
+#         "Frequency" : data.grouped.frequency,
+#         "Midpoint" : data.grouped.midpoint,
+        
+#         "C <" : data.grouped.bottom_limit,
+#         "CF <" : data.grouped.bottom_cumulative_frequency,
+#         "C >" : data.grouped.top_limit,
+#         "CF >" : data.grouped.top_cumulative_frequency,
+#         "Relative Frequency" : data.grouped.percentage_relative_frequency
 #     }
 # )
 
-# Converting Pandas Data Into Tabulate
-# tablesimple = tabulate.tabulate(
-#     dfs,
-#     headers='keys',
-#     tablefmt='pipe'
-# ) 
+# Simple Populated Data
+dfs = pd.DataFrame(
+    {
+        "Class" : data.simple.classval,
+        "Frequency" : data.simple.frequency,
+        "Relative Frequency" : data.simple.percentage_relative_frequency
+    }
+)
 
-tablegrouped = tabulate.tabulate(
-    dfg,
+# Converting Pandas Data Into Tabulate
+tablesimple = tabulate.tabulate(
+    dfs,
     headers='keys',
-    tablefmt='pipe',
-)
+    tablefmt='pipe'
+) 
+
+# tablegrouped = tabulate.tabulate(
+#     dfg,
+#     headers='keys',
+#     tablefmt='pipe',
+# )
 
 # Print The Processed Data
-# print(tablesimple)
-print(tablegrouped)
-print(data.length)
+print(tablesimple)
+# print(tablegrouped)
+
+