Skip to content

Commit e282935

Browse files
committed
feat: Adding Float/Decimal Data Handler
1 parent c68605d commit e282935

File tree

2 files changed

+51
-99
lines changed

2 files changed

+51
-99
lines changed

FrequencyTable.py

Lines changed: 50 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import numpy as np
2-
from scipy import stats
32

43
# Frequency Table Class
54
class FrequencyTable:
@@ -25,49 +24,38 @@ def __init__(self, dataset):
2524

2625
# Interval is Rounding Up
2726
self.interval = self.range / self.classes
28-
self.interval = round(self.interval + 0.5)
27+
self.interval = round(self.interval + 0.5, 2) # Keep two decimal places
2928

30-
# Rounding Both Limits So The Data Would Be Simple And Easier To Read
31-
self.base = self.roundy(self.lowest - 3)
32-
self.top = self.roundy(self.highest + 3)
29+
# Rounding Both Limits
30+
self.base = self.roundy(self.lowest - 0.5)
31+
self.top = self.roundy(self.highest + 0.5)
3332

3433
# Mean or Average
3534
self.mean = (self.sum / self.length)
3635

37-
# Formula for Variance
36+
# Variance and Standard Deviation
3837
self.variance = sum((x - self.mean) ** 2 for x in dataset) / self.length
39-
40-
# Formula for Standard Deviation
4138
self.deviation = (self.variance ** 0.5)
4239

43-
# Formula to find Dataset Skewness
40+
# Skewness
4441
self.skewness = (self.length / ((self.length - 1) * (self.length - 2))) * \
4542
sum(((x - self.mean) / self.deviation) ** 3 for x in self.dataset)
4643

47-
# Formula to find Dataset Kurtosis
44+
# Kurtosis
4845
self.kurtosis = (self.length * (self.length + 1) * sum(((x - self.mean) / self.deviation) ** 4 for x in self.dataset) /
4946
((self.length - 1) * (self.length - 2) * (self.length - 3))) - \
5047
(3 * (self.length - 1) ** 2) / ((self.length - 2) * (self.length - 3))
5148

52-
# Base 5 Rounding
53-
def roundy(self, x, base=5):
49+
# Base Rounding
50+
def roundy(self, x, base=0.5):
5451
return base * round(x / base)
55-
56-
# Function To Find Frequency in Dataset with Desired Range (Top and Down Limit)
52+
53+
# Function To Find Frequency in Dataset with Desired Range
5754
def find_frequency(self, bot, top):
58-
try:
59-
bot = int(bot)
60-
top = int(top)
61-
except (ValueError, TypeError) as e:
62-
print(f"Error converting to int: {e}")
63-
64-
total_frequency = 0
65-
for i in range(bot, top):
66-
frequency = self.dataset.count(i)
67-
total_frequency = total_frequency + frequency
55+
total_frequency = sum(1 for x in self.dataset if bot < x <= top)
6856
return total_frequency
69-
70-
# Populate Grouped Table Frequency Data Method
57+
58+
# Populate Grouped Frequency Table Data Method
7159
def PopulateGrouped(self):
7260
# Initiating Used List
7361
top = []
@@ -85,129 +73,108 @@ def PopulateGrouped(self):
8573
relative_frequency = []
8674
mode = []
8775

88-
# Initiating Used Parameter for Frequency Table
76+
# Frequency Table Initialization
8977
interval = self.interval
90-
current_number = self.base - 1
78+
current_number = self.base - 0.5
9179
old_number = 0
9280

9381
# Processing the Frequency Table Data
94-
while current_number <= self.top-3:
82+
while current_number <= self.top:
9583
# Finding Class Lowest Value
96-
old_number = current_number + 1
97-
bottom.append(old_number) # 155
84+
old_number = current_number + 0.5
85+
bottom.append(old_number)
9886

9987
# Finding Class Highest Value
10088
current_number = current_number + interval
10189
top.append(current_number)
10290

103-
# Append Class Bottom Limit
91+
# Class Limits
10492
current_bottom_limit = old_number - 0.5
10593
bottom_limit.append(current_bottom_limit)
106-
107-
# Append Class Top Limit
10894
current_top_limit = current_number + 0.5
10995
top_limit.append(current_top_limit)
11096

111-
# Finding The Frequency That Range
112-
current_frequency = self.find_frequency(old_number, current_number + 1)
97+
# Frequency Calculation
98+
current_frequency = self.find_frequency(old_number, current_number)
11399
frequency.append(current_frequency)
114100

115-
# Adding The Number Range From Both Frequency
116-
current_data_range = f"{old_number} ~ {current_number}"
101+
# Data Range and Limits
102+
current_data_range = f"{old_number:.2f} ~ {current_number:.2f}"
117103
data_range.append(current_data_range)
118-
119-
# Adding Data Range Limit Of The Class Frequency
120-
current_data_limit = f"{current_bottom_limit} ~ {current_top_limit}"
104+
current_data_limit = f"{current_bottom_limit:.2f} ~ {current_top_limit:.2f}"
121105
data_limit.append(current_data_limit)
122106

123-
# Adding Data Midpoint of The Class Frequency
107+
# Midpoint Calculation
124108
current_data_midpoint = (old_number + current_number) / 2
125109
data_midpoint.append(current_data_midpoint)
126110

127-
# Adding Bottom Cumulative Frequency of The Class
128-
current_bot_cumulative_frequency = self.find_frequency(self.lowest - 1, old_number)
111+
# Cumulative Frequencies
112+
current_bot_cumulative_frequency = self.find_frequency(self.lowest - 0.5, old_number)
129113
bot_cumulative_frequency.append(current_bot_cumulative_frequency)
130-
131-
# Adding Top Cumulative Frequency of The Class
132-
current_top_cumulative_frequency = self.find_frequency(current_number + 1, self.highest + 1)
114+
current_top_cumulative_frequency = self.find_frequency(current_number, self.highest + 0.5)
133115
top_cumulative_frequency.append(current_top_cumulative_frequency)
134-
135-
# Counting the Relative Frequency in Percentage
136-
current_relative_frequency = np.round((current_frequency / self.length) * 100)
137-
# Adding Percent Symbol into The Relative Frequency Coloumn
116+
117+
# Relative Frequency Calculation
118+
current_relative_frequency = np.round((current_frequency / self.length) * 100, 2)
138119
relative_frequency.append(current_relative_frequency)
139120

140-
# Find Mode or Data that appears most frequently
121+
# Find Mode
141122
mode_index = [i for i, val in enumerate(frequency) if val == max(frequency)]
142123
mode = [data_range[i] for i in mode_index]
143124

144-
# Append Processed Data into Data Attributes
125+
# Store Processed Data
145126
self.grouped = ProcessedData(None, bottom, top, bottom_limit, top_limit,
146127
frequency, data_range, data_limit, data_midpoint,
147128
bot_cumulative_frequency, top_cumulative_frequency,
148129
relative_frequency, mode)
149130

150-
# Populate Simple Table Frequency Data Method
131+
# Populate Simple Frequency Table Data Method
151132
def PopulateSimple(self):
152-
# Initialize general variables
153-
data = sorted(set(self.dataset)) # Remove duplicates and sort the data
154-
frequency = [] # To store the frequency of each class
155-
top_cumulative_frequency = [] # To store top cumulative frequency for each class
156-
bot_cumulative_frequency = [] # To store bottom cumulative frequency for each class
157-
relative_frequency = [] # To store relative frequency for each class
158-
mode = [] # To store the mode(s)
159-
160-
# Variables specifically for numeric data
133+
# Initialize variables
134+
data = sorted(set(self.dataset))
135+
frequency = []
136+
top_cumulative_frequency = []
137+
bot_cumulative_frequency = []
138+
relative_frequency = []
139+
mode = []
140+
141+
# Check for numeric data
161142
top_limit = None
162143
bottom_limit = None
163144

164-
# Check if the dataset is not entirely string-based (for numeric data)
165145
if not all(isinstance(item, str) for item in self.dataset):
166-
# Initialize limits for numeric data
167146
top_limit = []
168147
bottom_limit = []
169148

170-
# Single loop to process both numeric and string data
149+
# Process each class
171150
for current_class in data:
172-
# Calculate the frequency of the current class
173151
current_frequency = self.dataset.count(current_class)
174152
frequency.append(current_frequency)
175153

176-
# Calculate the relative frequency for the current class
177-
current_relative_frequency = np.round((current_frequency / self.length) * 100)
154+
current_relative_frequency = np.round((current_frequency / self.length) * 100, 2)
178155
relative_frequency.append(current_relative_frequency)
179156

180-
# If the data is numeric, calculate limits and cumulative frequencies
181157
if top_limit is not None and bottom_limit is not None:
182-
# Calculate top and bottom limits for numeric data
183158
current_top_limit = current_class + 0.5
184159
current_bottom_limit = current_class - 0.5
185160
top_limit.append(current_top_limit)
186161
bottom_limit.append(current_bottom_limit)
187162

188-
# Calculate bottom cumulative frequency for numeric data
189-
current_bot_cumulative_frequency = self.find_frequency(self.lowest - 1, current_class)
163+
current_bot_cumulative_frequency = self.find_frequency(self.lowest - 0.5, current_class)
190164
bot_cumulative_frequency.append(current_bot_cumulative_frequency)
191165

192-
# Calculate top cumulative frequency for numeric data
193-
current_top_cumulative_frequency = self.find_frequency(current_class + 1, self.highest + 1)
166+
current_top_cumulative_frequency = self.find_frequency(current_class, self.highest + 0.5)
194167
top_cumulative_frequency.append(current_top_cumulative_frequency)
195168

196169
else:
197-
# If the data is string-based, calculate cumulative frequencies
198-
# Calculate bottom cumulative frequency for strings
199170
current_bot_cumulative_frequency = self.dataset.count(current_class)
200171
bot_cumulative_frequency.append(current_bot_cumulative_frequency)
201-
202-
# Calculate top cumulative frequency for strings
203172
current_top_cumulative_frequency = sum(frequency) - current_bot_cumulative_frequency
204173
top_cumulative_frequency.append(current_top_cumulative_frequency)
205174

206-
# Find the mode (the class with the highest frequency)
207175
mode_index = [i for i, val in enumerate(frequency) if val == max(frequency)]
208176
mode = [data[i] for i in mode_index]
209177

210-
# Create the ProcessedData object based on the data type
211178
self.simple = ProcessedData(
212179
data, None, None, bottom_limit, top_limit,
213180
frequency, None, None, None,
@@ -217,7 +184,7 @@ def PopulateSimple(self):
217184

218185
# Processed Data Assignment
219186
class ProcessedData:
220-
# Limit (L), Frequency (F), Ranges (R), Midpoint (M), Cumulative (C), Relative (R)
187+
# Constructor for processed data
221188
def __init__(self, data, bot, top, bot_L, top_L, F, R, L, M, bot_CF, top_CF, RF, mode):
222189
self.classval = data
223190
self.bottom = bot
@@ -227,13 +194,10 @@ def __init__(self, data, bot, top, bot_L, top_L, F, R, L, M, bot_CF, top_CF, RF,
227194
self.midpoint = M
228195
self.ranges = R
229196
self.limit = L
230-
231197
self.frequency = F
232198
self.bottom_cumulative_frequency = bot_CF
233199
self.top_cumulative_frequency = top_CF
234200
self.relative_frequency = RF
235201

236202
self.percentage_relative_frequency = [ f"{rf * 1:.2f}%" for rf in self.relative_frequency ]
237-
self.mode = mode
238-
239-
203+
self.mode = mode

Main.py

Lines changed: 1 addition & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4,19 +4,7 @@
44
import tabulate as tabulate
55

66
# Raw Data
7-
dataset = [
8-
'Mango', 'Pineapple', 'Banana', 'Banana', 'Pineapple', 'Banana',
9-
'Banana', 'Grapes', 'Pear', 'Pineapple', 'Orange', 'Strawberry',
10-
'Orange', 'Mango', 'Banana', 'Pineapple', 'Orange', 'Banana',
11-
'Strawberry', 'Pear', 'Apple', 'Banana', 'Pineapple', 'Orange',
12-
'Mango', 'Apple', 'Pear', 'Pear', 'Pear', 'Grapes', 'Pear',
13-
'Orange', 'Grapes', 'Strawberry', 'Mango', 'Orange', 'Orange',
14-
'Mango', 'Pear', 'Strawberry', 'Pear', 'Orange', 'Mango',
15-
'Mango', 'Pear', 'Grapes', 'Apple', 'Mango', 'Pineapple',
16-
'Strawberry', 'Strawberry', 'Grapes', 'Apple', 'Banana',
17-
'Grapes', 'Banana', 'Strawberry', 'Mango', 'Strawberry',
18-
'Orange', 'Pear', 'Grapes', 'Orange', 'Apple'
19-
]
7+
dataset = [1.2, 2.5, 3.1, 4.7, 1.2, 2.5, 3.8, 4.5, 2.1, 3.3, 4.8, 5.0]
208

219

2210
# Initiate Object From The Raw Data

0 commit comments

Comments
 (0)