|
| 1 | +import numpy as np |
| 2 | + |
| 3 | +# Global Variable Used in Frequency Table Data Processing |
| 4 | +top = [] |
| 5 | +bottom = [] |
| 6 | +top_limit = [] |
| 7 | +bottom_limit = [] |
| 8 | +frequency = [] |
| 9 | +data_range = [] |
| 10 | +data_limit = [] |
| 11 | +data_midpoint = [] |
| 12 | +bot_cumulative_frequency = [] |
| 13 | +top_cumulative_frequency = [] |
| 14 | +relative_frequency = [] |
| 15 | +mode = [] |
| 16 | + |
| 17 | +# Frequency Table Class |
| 18 | +class FrequencyTable: |
| 19 | + def __init__(self, dataset): |
| 20 | + # Check for mixed data types (both numeric and string) |
| 21 | + if any(isinstance(item, str) for item in dataset) and any(isinstance(item, (int, float)) for item in dataset): |
| 22 | + raise ValueError("Data is corrupted: contains both numeric and string values.") |
| 23 | + |
| 24 | + # Data Initiation |
| 25 | + self.dataset = sorted(dataset) |
| 26 | + self.length = len(dataset) |
| 27 | + self.lowest = min(dataset) if isinstance(dataset[0], (int, float)) else None |
| 28 | + self.highest = max(dataset) if isinstance(dataset[0], (int, float)) else None |
| 29 | + |
| 30 | + if self.lowest is not None: # Only calculate classes for numeric data |
| 31 | + # Classes is Rounding Down |
| 32 | + self.classes = 1 + (3.222 * np.log10(self.length)) |
| 33 | + self.classes = round(self.classes - 0.5) |
| 34 | + |
| 35 | + # Sum of the data and range |
| 36 | + self.sum = sum(dataset) |
| 37 | + self.range = self.highest - self.lowest |
| 38 | + |
| 39 | + # Interval is Rounding Up |
| 40 | + self.interval = self.range / self.classes |
| 41 | + self.interval = round(self.interval + 0.5) |
| 42 | + |
| 43 | + # Rounding Both Limits So The Data Would Be Simple And Easier To Read |
| 44 | + self.base = self.roundy(self.lowest - 3) |
| 45 | + self.top = self.roundy(self.highest + 3) |
| 46 | + |
| 47 | + # Mean or Average |
| 48 | + self.mean = (self.sum / self.length) |
| 49 | + |
| 50 | + # Formula for Variance |
| 51 | + self.variance = sum((x - self.mean) ** 2 for x in dataset) / self.length |
| 52 | + |
| 53 | + # Formula for Standard Deviation |
| 54 | + self.deviation = (self.variance ** 0.5) |
| 55 | + |
| 56 | + # Formula to find Dataset Skewness |
| 57 | + self.skewness = (self.length / ((self.length - 1) * (self.length - 2))) * \ |
| 58 | + sum(((x - self.mean) / self.deviation) ** 3 for x in self.dataset) |
| 59 | + |
| 60 | + # Formula to find Dataset Kurtosis |
| 61 | + self.kurtosis = (self.length * (self.length + 1) * sum(((x - self.mean) / self.deviation) ** 4 for x in self.dataset) / |
| 62 | + ((self.length - 1) * (self.length - 2) * (self.length - 3))) - \ |
| 63 | + (3 * (self.length - 1) ** 2) / ((self.length - 2) * (self.length - 3)) |
| 64 | + |
| 65 | + # Base 5 Rounding |
| 66 | + def roundy(self, x, base=5): |
| 67 | + return base * round(x / base) |
| 68 | + |
| 69 | + # Function to Reset Frequency Table Data |
| 70 | + def reset(self): |
| 71 | + global top, bottom, top_limit, bottom_limit, frequency |
| 72 | + global data_range, data_limit, data_midpoint |
| 73 | + global bot_cumulative_frequency, top_cumulative_frequency, relative_frequency, mode |
| 74 | + |
| 75 | + top.clear() |
| 76 | + bottom.clear() |
| 77 | + top_limit.clear() |
| 78 | + bottom_limit.clear() |
| 79 | + frequency.clear() |
| 80 | + data_range.clear() |
| 81 | + data_limit.clear() |
| 82 | + data_midpoint.clear() |
| 83 | + bot_cumulative_frequency.clear() |
| 84 | + top_cumulative_frequency.clear() |
| 85 | + relative_frequency.clear() |
| 86 | + mode.clear() |
| 87 | + |
| 88 | + # Function To Find Frequency in Dataset with Desired Range (Top and Down Limit) |
| 89 | + def find_frequency(self, bot, top): |
| 90 | + total_frequency = 0 |
| 91 | + # Check if the dataset contains only integers |
| 92 | + is_integer_data = all(isinstance(x, int) for x in self.dataset) |
| 93 | + |
| 94 | + if is_integer_data: |
| 95 | + # Loop for integers |
| 96 | + for i in range(bot, top): |
| 97 | + frequency = self.dataset.count(i) |
| 98 | + total_frequency += frequency |
| 99 | + else: |
| 100 | + # Loop for decimals |
| 101 | + current = bot |
| 102 | + while current < top: |
| 103 | + frequency = self.dataset.count(round(current, 2)) # Round for matching |
| 104 | + total_frequency += frequency |
| 105 | + current += 0.01 # Increment by 0.01 for decimals |
| 106 | + |
| 107 | + return total_frequency |
| 108 | + |
| 109 | + # Populate Grouped Table Frequency Data Method |
| 110 | + def PopulateGrouped(self): |
| 111 | + try: |
| 112 | + # Check if the dataset contains text |
| 113 | + if any(isinstance(item, str) for item in self.dataset): |
| 114 | + raise ValueError("Text data is not allowed for grouped frequency tables. Please provide numeric data only.") |
| 115 | + |
| 116 | + self.reset() # Reset the frequency table data before processing |
| 117 | + |
| 118 | + # Initiating Used Parameter for Frequency Table |
| 119 | + old_number = 0 |
| 120 | + interval = self.interval |
| 121 | + current_number = self.base - 1 |
| 122 | + current_top_cumulative_frequency = 1 |
| 123 | + |
| 124 | + # Processing the Frequency Table Data |
| 125 | + while current_top_cumulative_frequency != 0: |
| 126 | + # Finding Class Lowest Value |
| 127 | + old_number = current_number + 1 |
| 128 | + bottom.append(old_number) |
| 129 | + |
| 130 | + # Finding Class Highest Value |
| 131 | + current_number = current_number + interval |
| 132 | + top.append(current_number) |
| 133 | + |
| 134 | + # Append Class Bottom Limit |
| 135 | + current_bottom_limit = old_number - 0.5 |
| 136 | + bottom_limit.append(current_bottom_limit) |
| 137 | + |
| 138 | + # Append Class Top Limit |
| 139 | + current_top_limit = current_number + 0.5 |
| 140 | + top_limit.append(current_top_limit) |
| 141 | + |
| 142 | + # Finding The Frequency That Range |
| 143 | + current_frequency = self.find_frequency(old_number, current_number + 1) |
| 144 | + frequency.append(current_frequency) |
| 145 | + |
| 146 | + # Adding The Number Range From Both Frequency |
| 147 | + current_data_range = f"{old_number:.2f} ~ {current_number:.2f}" if not all(isinstance(x, int) for x in self.dataset) else f"{old_number} ~ {current_number}" |
| 148 | + data_range.append(current_data_range) |
| 149 | + |
| 150 | + # Adding Data Range Limit Of The Class Frequency |
| 151 | + current_data_limit = f"{current_bottom_limit:.2f} ~ {current_top_limit:.2f}" if not all(isinstance(x, int) for x in self.dataset) else f"{current_bottom_limit} ~ {current_top_limit}" |
| 152 | + data_limit.append(current_data_limit) |
| 153 | + |
| 154 | + # Adding Data Midpoint of The Class Frequency |
| 155 | + current_data_midpoint = (old_number + current_number) / 2 |
| 156 | + data_midpoint.append(current_data_midpoint) |
| 157 | + |
| 158 | + # Adding Bottom Cumulative Frequency of The Class |
| 159 | + current_bot_cumulative_frequency = self.find_frequency(self.lowest - 1, old_number) |
| 160 | + bot_cumulative_frequency.append(current_bot_cumulative_frequency) |
| 161 | + |
| 162 | + # Adding Top Cumulative Frequency of The Class |
| 163 | + current_top_cumulative_frequency = self.find_frequency(current_number + 1, self.highest + 1) |
| 164 | + top_cumulative_frequency.append(current_top_cumulative_frequency) |
| 165 | + |
| 166 | + # Counting the Relative Frequency in Percentage |
| 167 | + current_relative_frequency = np.round((current_frequency / self.length) * 100) |
| 168 | + relative_frequency.append(current_relative_frequency) |
| 169 | + |
| 170 | + # Find Mode or Data that appears most frequently |
| 171 | + mode_index = [i for i, val in enumerate(frequency) if val == max(frequency)] |
| 172 | + mode = [data_range[i] for i in mode_index] |
| 173 | + |
| 174 | + # Append Processed Data into Data Attributes |
| 175 | + self.grouped = ProcessedData(None, bottom, top, bottom_limit, top_limit, |
| 176 | + frequency, data_range, data_limit, data_midpoint, |
| 177 | + bot_cumulative_frequency, top_cumulative_frequency, |
| 178 | + relative_frequency, mode) |
| 179 | + |
| 180 | + except ValueError as e: |
| 181 | + print(f"Error: {e}") |
| 182 | + |
| 183 | + # Populate Simple Table Frequency Data Method |
| 184 | + def PopulateSimple(self): |
| 185 | + self.reset() # Reset the frequency table data before processing |
| 186 | + |
| 187 | + # Initialize general variables |
| 188 | + data = sorted(set(self.dataset)) # Remove duplicates and sort the data |
| 189 | + |
| 190 | + # Initialize limits for numeric data |
| 191 | + top_limit = [] |
| 192 | + bottom_limit = [] |
| 193 | + |
| 194 | + # Single loop to process both numeric and string data |
| 195 | + for current_class in data: |
| 196 | + # Calculate the frequency of the current class |
| 197 | + current_frequency = self.dataset.count(current_class) |
| 198 | + frequency.append(current_frequency) |
| 199 | + |
| 200 | + # Calculate the relative frequency for the current class |
| 201 | + current_relative_frequency = np.round((current_frequency / self.length) * 100) |
| 202 | + relative_frequency.append(current_relative_frequency) |
| 203 | + |
| 204 | + # If the data is numeric, calculate limits and cumulative frequencies |
| 205 | + if not all(isinstance(item, str) for item in self.dataset): |
| 206 | + # Calculate top and bottom limits for numeric data |
| 207 | + current_top_limit = current_class + 0.5 |
| 208 | + current_bottom_limit = current_class - 0.5 |
| 209 | + top_limit.append(current_top_limit) |
| 210 | + bottom_limit.append(current_bottom_limit) |
| 211 | + |
| 212 | + # Calculate bottom cumulative frequency for numeric data |
| 213 | + current_bot_cumulative_frequency = self.find_frequency(self.lowest - 1, current_class) |
| 214 | + bot_cumulative_frequency.append(current_bot_cumulative_frequency) |
| 215 | + |
| 216 | + # Calculate top cumulative frequency for numeric data |
| 217 | + current_top_cumulative_frequency = self.find_frequency(current_class + 1, self.highest + 1) |
| 218 | + top_cumulative_frequency.append(current_top_cumulative_frequency) |
| 219 | + |
| 220 | + else: |
| 221 | + # If the data is string-based, calculate cumulative frequencies |
| 222 | + # Calculate bottom cumulative frequency for strings |
| 223 | + current_bot_cumulative_frequency = self.dataset.count(current_class) |
| 224 | + bot_cumulative_frequency.append(current_bot_cumulative_frequency) |
| 225 | + |
| 226 | + # Calculate top cumulative frequency for strings |
| 227 | + current_top_cumulative_frequency = sum(frequency) - current_bot_cumulative_frequency |
| 228 | + top_cumulative_frequency.append(current_top_cumulative_frequency) |
| 229 | + |
| 230 | + # Find the mode (the class with the highest frequency) |
| 231 | + mode_index = [i for i, val in enumerate(frequency) if val == max(frequency)] |
| 232 | + mode = [data[i] for i in mode_index] |
| 233 | + |
| 234 | + # Create the ProcessedData object based on the data type |
| 235 | + self.simple = ProcessedData( |
| 236 | + data, None, None, bottom_limit, top_limit, |
| 237 | + frequency, None, None, None, |
| 238 | + bot_cumulative_frequency, top_cumulative_frequency, |
| 239 | + relative_frequency, mode |
| 240 | + ) |
| 241 | + |
| 242 | +# Processed Data Assignment |
| 243 | +class ProcessedData: |
| 244 | + # Limit (L), Frequency (F), Ranges (R), Midpoint (M), Cumulative (C), Relative (R) |
| 245 | + def __init__(self, data, bot, top, bot_L, top_L, F, R, L, M, bot_CF, top_CF, RF, mode): |
| 246 | + self.classval = data |
| 247 | + self.bottom = bot |
| 248 | + self.top = top |
| 249 | + self.bottom_limit = bot_L |
| 250 | + self.top_limit = top_L |
| 251 | + self.midpoint = M |
| 252 | + self.ranges = R |
| 253 | + self.limit = L |
| 254 | + self.frequency = F |
| 255 | + self.bottom_cumulative_frequency = bot_CF |
| 256 | + self.top_cumulative_frequency = top_CF |
| 257 | + self.relative_frequency = RF |
| 258 | + self.percentage_relative_frequency = [f"{rf * 1:.2f}%" for rf in self.relative_frequency] |
| 259 | + self.mode = mode |
0 commit comments