feat: Adding Float/Decimal Data Handler

brotherzhafif · brotherzhafif · commit e2829353ff09 · 2024-10-13T11:13:07.000+07:00
diff --git a/FrequencyTable.py b/FrequencyTable.py
@@ -1,5 +1,4 @@
 import numpy as np
-from scipy import stats
 
 # Frequency Table Class 
 class FrequencyTable:
@@ -25,49 +24,38 @@ def __init__(self, dataset):
 
             # Interval is Rounding Up
             self.interval = self.range / self.classes 
-            self.interval = round(self.interval + 0.5)
+            self.interval = round(self.interval + 0.5, 2)  # Keep two decimal places
 
-            # Rounding Both Limits So The Data Would Be Simple And Easier To Read
-            self.base = self.roundy(self.lowest - 3)
-            self.top = self.roundy(self.highest + 3)
+            # Rounding Both Limits
+            self.base = self.roundy(self.lowest - 0.5)
+            self.top = self.roundy(self.highest + 0.5)
 
             # Mean or Average
             self.mean = (self.sum / self.length)
 
-            # Formula for Variance
+            # Variance and Standard Deviation
             self.variance = sum((x - self.mean) ** 2 for x in dataset) / self.length
-
-            # Formula for Standard Deviation
             self.deviation = (self.variance ** 0.5)
 
-            # Formula to find Dataset Skewness
+            # Skewness
             self.skewness = (self.length / ((self.length - 1) * (self.length - 2))) * \
                             sum(((x - self.mean) / self.deviation) ** 3 for x in self.dataset)
 
-            # Formula to find Dataset Kurtosis
+            # Kurtosis
             self.kurtosis = (self.length * (self.length + 1) * sum(((x - self.mean) / self.deviation) ** 4 for x in self.dataset) / 
                             ((self.length - 1) * (self.length - 2) * (self.length - 3))) - \
                             (3 * (self.length - 1) ** 2) / ((self.length - 2) * (self.length - 3))
 
-    # Base 5 Rounding
-    def roundy(self, x, base=5):
+    # Base Rounding
+    def roundy(self, x, base=0.5):
         return base * round(x / base)
-    
-    # Function To Find Frequency in Dataset with Desired Range (Top and Down Limit)
+
+    # Function To Find Frequency in Dataset with Desired Range
     def find_frequency(self, bot, top):
-        try:
-            bot = int(bot)
-            top = int(top)
-        except (ValueError, TypeError) as e:
-            print(f"Error converting to int: {e}")
-    
-        total_frequency = 0
-        for i in range(bot, top):
-            frequency = self.dataset.count(i)
-            total_frequency = total_frequency + frequency
+        total_frequency = sum(1 for x in self.dataset if bot < x <= top)
         return total_frequency
-    
-    # Populate Grouped Table Frequency Data Method
+
+    # Populate Grouped Frequency Table Data Method
     def PopulateGrouped(self):
         # Initiating Used List
         top = []
@@ -85,129 +73,108 @@ def PopulateGrouped(self):
         relative_frequency = []
         mode = []
 
-        # Initiating Used Parameter for Frequency Table
+        # Frequency Table Initialization
         interval = self.interval
-        current_number = self.base - 1
+        current_number = self.base - 0.5
         old_number = 0
 
         # Processing the Frequency Table Data
-        while current_number <= self.top-3:
+        while current_number <= self.top:
             # Finding Class Lowest Value
-            old_number = current_number + 1
-            bottom.append(old_number) # 155
+            old_number = current_number + 0.5
+            bottom.append(old_number) 
             
             # Finding Class Highest Value 
             current_number = current_number + interval
             top.append(current_number)
             
-            # Append Class Bottom Limit
+            # Class Limits
             current_bottom_limit = old_number - 0.5
             bottom_limit.append(current_bottom_limit)
-
-            # Append Class Top Limit
             current_top_limit = current_number + 0.5
             top_limit.append(current_top_limit)
 
-            # Finding The Frequency That Range
-            current_frequency = self.find_frequency(old_number, current_number + 1)
+            # Frequency Calculation
+            current_frequency = self.find_frequency(old_number, current_number)
             frequency.append(current_frequency)
 
-            # Adding The Number Range From Both Frequency
-            current_data_range = f"{old_number} ~ {current_number}"
+            # Data Range and Limits
+            current_data_range = f"{old_number:.2f} ~ {current_number:.2f}"
             data_range.append(current_data_range)
-
-            # Adding Data Range Limit Of The Class Frequency
-            current_data_limit = f"{current_bottom_limit} ~ {current_top_limit}"
+            current_data_limit = f"{current_bottom_limit:.2f} ~ {current_top_limit:.2f}"
             data_limit.append(current_data_limit)   
 
-            # Adding Data Midpoint of The Class Frequency
+            # Midpoint Calculation
             current_data_midpoint = (old_number + current_number) / 2
             data_midpoint.append(current_data_midpoint)
 
-            # Adding Bottom Cumulative Frequency of The Class 
-            current_bot_cumulative_frequency = self.find_frequency(self.lowest - 1, old_number)
+            # Cumulative Frequencies
+            current_bot_cumulative_frequency = self.find_frequency(self.lowest - 0.5, old_number)
             bot_cumulative_frequency.append(current_bot_cumulative_frequency)
-
-            # Adding Top Cumulative Frequency of The Class 
-            current_top_cumulative_frequency = self.find_frequency(current_number + 1, self.highest + 1)
+            current_top_cumulative_frequency = self.find_frequency(current_number, self.highest + 0.5)
             top_cumulative_frequency.append(current_top_cumulative_frequency)
-        
-            # Counting the Relative Frequency in Percentage
-            current_relative_frequency = np.round((current_frequency / self.length) * 100)
-            # Adding Percent Symbol into The Relative Frequency Coloumn
+
+            # Relative Frequency Calculation
+            current_relative_frequency = np.round((current_frequency / self.length) * 100, 2)
             relative_frequency.append(current_relative_frequency)    
         
-        # Find Mode or Data that appears most frequently 
+        # Find Mode
         mode_index = [i for i, val in enumerate(frequency) if val == max(frequency)]
         mode = [data_range[i] for i in mode_index]
         
-        # Append Processed Data into Data Attributes
+        # Store Processed Data
         self.grouped = ProcessedData(None, bottom, top, bottom_limit, top_limit, 
                                      frequency, data_range, data_limit, data_midpoint, 
                                      bot_cumulative_frequency, top_cumulative_frequency, 
                                      relative_frequency, mode)
   
-    # Populate Simple Table Frequency Data Method    
+    # Populate Simple Frequency Table Data Method    
     def PopulateSimple(self):
-        # Initialize general variables
-        data = sorted(set(self.dataset))  # Remove duplicates and sort the data
-        frequency = []  # To store the frequency of each class
-        top_cumulative_frequency = []  # To store top cumulative frequency for each class
-        bot_cumulative_frequency = []  # To store bottom cumulative frequency for each class
-        relative_frequency = []  # To store relative frequency for each class
-        mode = []  # To store the mode(s)
-
-        # Variables specifically for numeric data
+        # Initialize variables
+        data = sorted(set(self.dataset))  
+        frequency = []  
+        top_cumulative_frequency = []  
+        bot_cumulative_frequency = []  
+        relative_frequency = []  
+        mode = []  
+
+        # Check for numeric data
         top_limit = None
         bottom_limit = None
 
-        # Check if the dataset is not entirely string-based (for numeric data)
         if not all(isinstance(item, str) for item in self.dataset):
-            # Initialize limits for numeric data
             top_limit = []
             bottom_limit = []
 
-        # Single loop to process both numeric and string data
+        # Process each class
         for current_class in data:
-            # Calculate the frequency of the current class
             current_frequency = self.dataset.count(current_class)
             frequency.append(current_frequency)
 
-            # Calculate the relative frequency for the current class
-            current_relative_frequency = np.round((current_frequency / self.length) * 100)
+            current_relative_frequency = np.round((current_frequency / self.length) * 100, 2)
             relative_frequency.append(current_relative_frequency)
 
-            # If the data is numeric, calculate limits and cumulative frequencies
             if top_limit is not None and bottom_limit is not None:
-                # Calculate top and bottom limits for numeric data
                 current_top_limit = current_class + 0.5
                 current_bottom_limit = current_class - 0.5
                 top_limit.append(current_top_limit)
                 bottom_limit.append(current_bottom_limit)
 
-                # Calculate bottom cumulative frequency for numeric data
-                current_bot_cumulative_frequency = self.find_frequency(self.lowest - 1, current_class)
+                current_bot_cumulative_frequency = self.find_frequency(self.lowest - 0.5, current_class)
                 bot_cumulative_frequency.append(current_bot_cumulative_frequency)
 
-                # Calculate top cumulative frequency for numeric data
-                current_top_cumulative_frequency = self.find_frequency(current_class + 1, self.highest + 1)
+                current_top_cumulative_frequency = self.find_frequency(current_class, self.highest + 0.5)
                 top_cumulative_frequency.append(current_top_cumulative_frequency)
 
             else:
-                # If the data is string-based, calculate cumulative frequencies
-                # Calculate bottom cumulative frequency for strings
                 current_bot_cumulative_frequency = self.dataset.count(current_class)
                 bot_cumulative_frequency.append(current_bot_cumulative_frequency)
-
-                # Calculate top cumulative frequency for strings
                 current_top_cumulative_frequency = sum(frequency) - current_bot_cumulative_frequency
                 top_cumulative_frequency.append(current_top_cumulative_frequency)
 
-        # Find the mode (the class with the highest frequency)
         mode_index = [i for i, val in enumerate(frequency) if val == max(frequency)]
         mode = [data[i] for i in mode_index]
 
-        # Create the ProcessedData object based on the data type
         self.simple = ProcessedData(
             data, None, None, bottom_limit, top_limit, 
             frequency, None, None, None, 
@@ -217,7 +184,7 @@ def PopulateSimple(self):
         
 # Processed Data Assignment 
 class ProcessedData:
-    # Limit (L), Frequency (F), Ranges (R), Midpoint (M), Cumulative (C), Relative (R) 
+    # Constructor for processed data
     def __init__(self, data, bot, top, bot_L, top_L, F, R, L, M, bot_CF, top_CF, RF, mode):
         self.classval = data
         self.bottom = bot
@@ -227,13 +194,10 @@ def __init__(self, data, bot, top, bot_L, top_L, F, R, L, M, bot_CF, top_CF, RF,
         self.midpoint = M
         self.ranges = R      
         self.limit = L     
-
         self.frequency = F
         self.bottom_cumulative_frequency = bot_CF
         self.top_cumulative_frequency = top_CF
         self.relative_frequency = RF
         
         self.percentage_relative_frequency = [ f"{rf * 1:.2f}%" for rf in self.relative_frequency ]
-        self.mode = mode
-        
- 
+        self.mode = mode
diff --git a/Main.py b/Main.py
@@ -4,19 +4,7 @@
 import tabulate as tabulate
  
 # Raw Data
-dataset = [
-    'Mango', 'Pineapple', 'Banana', 'Banana', 'Pineapple', 'Banana', 
-    'Banana', 'Grapes', 'Pear', 'Pineapple', 'Orange', 'Strawberry', 
-    'Orange', 'Mango', 'Banana', 'Pineapple', 'Orange', 'Banana', 
-    'Strawberry', 'Pear', 'Apple', 'Banana', 'Pineapple', 'Orange', 
-    'Mango', 'Apple', 'Pear', 'Pear', 'Pear', 'Grapes', 'Pear', 
-    'Orange', 'Grapes', 'Strawberry', 'Mango', 'Orange', 'Orange', 
-    'Mango', 'Pear', 'Strawberry', 'Pear', 'Orange', 'Mango', 
-    'Mango', 'Pear', 'Grapes', 'Apple', 'Mango', 'Pineapple', 
-    'Strawberry', 'Strawberry', 'Grapes', 'Apple', 'Banana', 
-    'Grapes', 'Banana', 'Strawberry', 'Mango', 'Strawberry', 
-    'Orange', 'Pear', 'Grapes', 'Orange', 'Apple'
-]
+dataset = [1.2, 2.5, 3.1, 4.7, 1.2, 2.5, 3.8, 4.5, 2.1, 3.3, 4.8, 5.0]
 
 
 # Initiate Object From The Raw Data