Skip to content

Commit 11e24a5

Browse files
committed
feat: Uploading as Library to PyPI
1 parent 5a550df commit 11e24a5

File tree

6 files changed

+494
-0
lines changed

6 files changed

+494
-0
lines changed

LICENSE

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
MIT License
2+
3+
Copyright (c) [2024] [BrotherZhafif]
4+
5+
Permission is hereby granted, free of charge, to any person obtaining a copy
6+
of this software and associated documentation files (the "Software"), to deal
7+
in the Software without restriction, including without limitation the rights
8+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
copies of the Software, and to permit persons to whom the Software is
10+
furnished to do so, subject to the following conditions:
11+
12+
The above copyright notice and this permission notice shall be included in all
13+
copies or substantial portions of the Software.
14+
15+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21+
SOFTWARE.

pythistic/Chart.py

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
# Chart.py
2+
import matplotlib.pyplot as plt
3+
import numpy as np
4+
from matplotlib_venn import venn2, venn3
5+
6+
class Chart:
7+
def __init__(self, title="", xlabel="", ylabel=""):
8+
self.title = title
9+
self.xlabel = xlabel
10+
self.ylabel = ylabel
11+
self.figure = None
12+
13+
def _apply_common_properties(self):
14+
if self.title:
15+
plt.title(self.title)
16+
if self.xlabel:
17+
plt.xlabel(self.xlabel)
18+
if self.ylabel:
19+
plt.ylabel(self.ylabel)
20+
21+
def box(self, x_values, y_values, is_range=False):
22+
self.figure = plt.figure(figsize=(10, 6))
23+
bar_width = 0.5
24+
indices = range(len(y_values))
25+
26+
plt.bar(indices, y_values, width=bar_width, alpha=0.7, color='b')
27+
28+
if is_range:
29+
plt.xticks(indices, x_values) # Use ranges as labels
30+
else:
31+
plt.xticks(indices, [str(x) for x in x_values])
32+
33+
self._apply_common_properties()
34+
plt.grid(axis='y')
35+
36+
def line(self, x_values, y_values, is_range=False):
37+
self.figure = plt.figure(figsize=(10, 6))
38+
if is_range:
39+
x_values = [midpoint for midpoint in x_values] # Use midpoints for line plot
40+
41+
plt.plot(x_values, y_values, marker='o')
42+
self._apply_common_properties()
43+
plt.grid()
44+
45+
def scatter(self, x_values, y_values, is_range=False):
46+
self.figure = plt.figure(figsize=(10, 6))
47+
if is_range:
48+
x_values = [midpoint for midpoint in x_values] # Use midpoints for scatter plot
49+
50+
plt.scatter(x_values, y_values, alpha=0.6, edgecolors='w', s=100)
51+
self._apply_common_properties()
52+
plt.grid()
53+
54+
def pie(self, data, labels):
55+
# Prepare a pie chart to show percentage distribution.
56+
self.figure = plt.figure(figsize=(8, 8))
57+
plt.pie(data, labels=labels, autopct='%1.1f%%', startangle=140)
58+
if self.title:
59+
plt.title(self.title)
60+
61+
def heatmap(self, data, annot=True, cmap='viridis'):
62+
# Prepare a heatmap for visualizing a 2D matrix data.
63+
self.figure = plt.figure(figsize=(12, 8))
64+
plt.imshow(data, cmap=cmap, aspect='auto')
65+
if annot:
66+
for (i, j), val in np.ndenumerate(data):
67+
plt.text(j, i, f'{val}', ha='center', va='center', color='white')
68+
self._apply_common_properties()
69+
plt.colorbar() # Show color scale.
70+
71+
def venn(self, sets, set_labels):
72+
# Prepare a Venn diagram for visualizing the overlap between two or three sets.
73+
self.figure = plt.figure(figsize=(8, 8))
74+
if len(sets) == 2:
75+
venn2(sets, set_labels)
76+
elif len(sets) == 3:
77+
venn3(sets, set_labels)
78+
else:
79+
raise ValueError("Only 2 or 3 sets can be displayed in a Venn diagram.")
80+
if self.title:
81+
plt.title(self.title)
82+
83+
def pareto(self, data, labels):
84+
# Prepare a Pareto chart with bars representing values and a line showing the cumulative percentage.
85+
# Sort data in descending order.
86+
sorted_data = sorted(zip(data, labels), reverse=True)
87+
data, labels = zip(*sorted_data)
88+
cumulative_percentage = [sum(data[:i + 1]) / sum(data) * 100 for i in range(len(data))]
89+
90+
self.figure, ax1 = plt.subplots(figsize=(12, 8))
91+
ax1.bar(labels, data, color='b', alpha=0.6)
92+
ax1.set_xlabel(self.xlabel)
93+
ax1.set_ylabel(self.ylabel)
94+
95+
# Plot cumulative percentage line.
96+
ax2 = ax1.twinx()
97+
ax2.plot(labels, cumulative_percentage, color='r', marker='D', linestyle='-', linewidth=2)
98+
ax2.set_ylabel('Cumulative Percentage')
99+
ax2.yaxis.set_major_formatter(plt.FuncFormatter(lambda y, _: f'{y:.0f}%'))
100+
101+
if self.title:
102+
ax1.set_title(self.title)
103+
104+
def show(self):
105+
# Display the prepared chart.
106+
if self.figure:
107+
plt.show()
108+
else:
109+
print("No chart has been prepared. Please call a chart method first.")

pythistic/FrequencyTable.py

Lines changed: 259 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,259 @@
1+
import numpy as np
2+
3+
# Global Variable Used in Frequency Table Data Processing
4+
top = []
5+
bottom = []
6+
top_limit = []
7+
bottom_limit = []
8+
frequency = []
9+
data_range = []
10+
data_limit = []
11+
data_midpoint = []
12+
bot_cumulative_frequency = []
13+
top_cumulative_frequency = []
14+
relative_frequency = []
15+
mode = []
16+
17+
# Frequency Table Class
18+
class FrequencyTable:
19+
def __init__(self, dataset):
20+
# Check for mixed data types (both numeric and string)
21+
if any(isinstance(item, str) for item in dataset) and any(isinstance(item, (int, float)) for item in dataset):
22+
raise ValueError("Data is corrupted: contains both numeric and string values.")
23+
24+
# Data Initiation
25+
self.dataset = sorted(dataset)
26+
self.length = len(dataset)
27+
self.lowest = min(dataset) if isinstance(dataset[0], (int, float)) else None
28+
self.highest = max(dataset) if isinstance(dataset[0], (int, float)) else None
29+
30+
if self.lowest is not None: # Only calculate classes for numeric data
31+
# Classes is Rounding Down
32+
self.classes = 1 + (3.222 * np.log10(self.length))
33+
self.classes = round(self.classes - 0.5)
34+
35+
# Sum of the data and range
36+
self.sum = sum(dataset)
37+
self.range = self.highest - self.lowest
38+
39+
# Interval is Rounding Up
40+
self.interval = self.range / self.classes
41+
self.interval = round(self.interval + 0.5)
42+
43+
# Rounding Both Limits So The Data Would Be Simple And Easier To Read
44+
self.base = self.roundy(self.lowest - 3)
45+
self.top = self.roundy(self.highest + 3)
46+
47+
# Mean or Average
48+
self.mean = (self.sum / self.length)
49+
50+
# Formula for Variance
51+
self.variance = sum((x - self.mean) ** 2 for x in dataset) / self.length
52+
53+
# Formula for Standard Deviation
54+
self.deviation = (self.variance ** 0.5)
55+
56+
# Formula to find Dataset Skewness
57+
self.skewness = (self.length / ((self.length - 1) * (self.length - 2))) * \
58+
sum(((x - self.mean) / self.deviation) ** 3 for x in self.dataset)
59+
60+
# Formula to find Dataset Kurtosis
61+
self.kurtosis = (self.length * (self.length + 1) * sum(((x - self.mean) / self.deviation) ** 4 for x in self.dataset) /
62+
((self.length - 1) * (self.length - 2) * (self.length - 3))) - \
63+
(3 * (self.length - 1) ** 2) / ((self.length - 2) * (self.length - 3))
64+
65+
# Base 5 Rounding
66+
def roundy(self, x, base=5):
67+
return base * round(x / base)
68+
69+
# Function to Reset Frequency Table Data
70+
def reset(self):
71+
global top, bottom, top_limit, bottom_limit, frequency
72+
global data_range, data_limit, data_midpoint
73+
global bot_cumulative_frequency, top_cumulative_frequency, relative_frequency, mode
74+
75+
top.clear()
76+
bottom.clear()
77+
top_limit.clear()
78+
bottom_limit.clear()
79+
frequency.clear()
80+
data_range.clear()
81+
data_limit.clear()
82+
data_midpoint.clear()
83+
bot_cumulative_frequency.clear()
84+
top_cumulative_frequency.clear()
85+
relative_frequency.clear()
86+
mode.clear()
87+
88+
# Function To Find Frequency in Dataset with Desired Range (Top and Down Limit)
89+
def find_frequency(self, bot, top):
90+
total_frequency = 0
91+
# Check if the dataset contains only integers
92+
is_integer_data = all(isinstance(x, int) for x in self.dataset)
93+
94+
if is_integer_data:
95+
# Loop for integers
96+
for i in range(bot, top):
97+
frequency = self.dataset.count(i)
98+
total_frequency += frequency
99+
else:
100+
# Loop for decimals
101+
current = bot
102+
while current < top:
103+
frequency = self.dataset.count(round(current, 2)) # Round for matching
104+
total_frequency += frequency
105+
current += 0.01 # Increment by 0.01 for decimals
106+
107+
return total_frequency
108+
109+
# Populate Grouped Table Frequency Data Method
110+
def PopulateGrouped(self):
111+
try:
112+
# Check if the dataset contains text
113+
if any(isinstance(item, str) for item in self.dataset):
114+
raise ValueError("Text data is not allowed for grouped frequency tables. Please provide numeric data only.")
115+
116+
self.reset() # Reset the frequency table data before processing
117+
118+
# Initiating Used Parameter for Frequency Table
119+
old_number = 0
120+
interval = self.interval
121+
current_number = self.base - 1
122+
current_top_cumulative_frequency = 1
123+
124+
# Processing the Frequency Table Data
125+
while current_top_cumulative_frequency != 0:
126+
# Finding Class Lowest Value
127+
old_number = current_number + 1
128+
bottom.append(old_number)
129+
130+
# Finding Class Highest Value
131+
current_number = current_number + interval
132+
top.append(current_number)
133+
134+
# Append Class Bottom Limit
135+
current_bottom_limit = old_number - 0.5
136+
bottom_limit.append(current_bottom_limit)
137+
138+
# Append Class Top Limit
139+
current_top_limit = current_number + 0.5
140+
top_limit.append(current_top_limit)
141+
142+
# Finding The Frequency That Range
143+
current_frequency = self.find_frequency(old_number, current_number + 1)
144+
frequency.append(current_frequency)
145+
146+
# Adding The Number Range From Both Frequency
147+
current_data_range = f"{old_number:.2f} ~ {current_number:.2f}" if not all(isinstance(x, int) for x in self.dataset) else f"{old_number} ~ {current_number}"
148+
data_range.append(current_data_range)
149+
150+
# Adding Data Range Limit Of The Class Frequency
151+
current_data_limit = f"{current_bottom_limit:.2f} ~ {current_top_limit:.2f}" if not all(isinstance(x, int) for x in self.dataset) else f"{current_bottom_limit} ~ {current_top_limit}"
152+
data_limit.append(current_data_limit)
153+
154+
# Adding Data Midpoint of The Class Frequency
155+
current_data_midpoint = (old_number + current_number) / 2
156+
data_midpoint.append(current_data_midpoint)
157+
158+
# Adding Bottom Cumulative Frequency of The Class
159+
current_bot_cumulative_frequency = self.find_frequency(self.lowest - 1, old_number)
160+
bot_cumulative_frequency.append(current_bot_cumulative_frequency)
161+
162+
# Adding Top Cumulative Frequency of The Class
163+
current_top_cumulative_frequency = self.find_frequency(current_number + 1, self.highest + 1)
164+
top_cumulative_frequency.append(current_top_cumulative_frequency)
165+
166+
# Counting the Relative Frequency in Percentage
167+
current_relative_frequency = np.round((current_frequency / self.length) * 100)
168+
relative_frequency.append(current_relative_frequency)
169+
170+
# Find Mode or Data that appears most frequently
171+
mode_index = [i for i, val in enumerate(frequency) if val == max(frequency)]
172+
mode = [data_range[i] for i in mode_index]
173+
174+
# Append Processed Data into Data Attributes
175+
self.grouped = ProcessedData(None, bottom, top, bottom_limit, top_limit,
176+
frequency, data_range, data_limit, data_midpoint,
177+
bot_cumulative_frequency, top_cumulative_frequency,
178+
relative_frequency, mode)
179+
180+
except ValueError as e:
181+
print(f"Error: {e}")
182+
183+
# Populate Simple Table Frequency Data Method
184+
def PopulateSimple(self):
185+
self.reset() # Reset the frequency table data before processing
186+
187+
# Initialize general variables
188+
data = sorted(set(self.dataset)) # Remove duplicates and sort the data
189+
190+
# Initialize limits for numeric data
191+
top_limit = []
192+
bottom_limit = []
193+
194+
# Single loop to process both numeric and string data
195+
for current_class in data:
196+
# Calculate the frequency of the current class
197+
current_frequency = self.dataset.count(current_class)
198+
frequency.append(current_frequency)
199+
200+
# Calculate the relative frequency for the current class
201+
current_relative_frequency = np.round((current_frequency / self.length) * 100)
202+
relative_frequency.append(current_relative_frequency)
203+
204+
# If the data is numeric, calculate limits and cumulative frequencies
205+
if not all(isinstance(item, str) for item in self.dataset):
206+
# Calculate top and bottom limits for numeric data
207+
current_top_limit = current_class + 0.5
208+
current_bottom_limit = current_class - 0.5
209+
top_limit.append(current_top_limit)
210+
bottom_limit.append(current_bottom_limit)
211+
212+
# Calculate bottom cumulative frequency for numeric data
213+
current_bot_cumulative_frequency = self.find_frequency(self.lowest - 1, current_class)
214+
bot_cumulative_frequency.append(current_bot_cumulative_frequency)
215+
216+
# Calculate top cumulative frequency for numeric data
217+
current_top_cumulative_frequency = self.find_frequency(current_class + 1, self.highest + 1)
218+
top_cumulative_frequency.append(current_top_cumulative_frequency)
219+
220+
else:
221+
# If the data is string-based, calculate cumulative frequencies
222+
# Calculate bottom cumulative frequency for strings
223+
current_bot_cumulative_frequency = self.dataset.count(current_class)
224+
bot_cumulative_frequency.append(current_bot_cumulative_frequency)
225+
226+
# Calculate top cumulative frequency for strings
227+
current_top_cumulative_frequency = sum(frequency) - current_bot_cumulative_frequency
228+
top_cumulative_frequency.append(current_top_cumulative_frequency)
229+
230+
# Find the mode (the class with the highest frequency)
231+
mode_index = [i for i, val in enumerate(frequency) if val == max(frequency)]
232+
mode = [data[i] for i in mode_index]
233+
234+
# Create the ProcessedData object based on the data type
235+
self.simple = ProcessedData(
236+
data, None, None, bottom_limit, top_limit,
237+
frequency, None, None, None,
238+
bot_cumulative_frequency, top_cumulative_frequency,
239+
relative_frequency, mode
240+
)
241+
242+
# Processed Data Assignment
243+
class ProcessedData:
244+
# Limit (L), Frequency (F), Ranges (R), Midpoint (M), Cumulative (C), Relative (R)
245+
def __init__(self, data, bot, top, bot_L, top_L, F, R, L, M, bot_CF, top_CF, RF, mode):
246+
self.classval = data
247+
self.bottom = bot
248+
self.top = top
249+
self.bottom_limit = bot_L
250+
self.top_limit = top_L
251+
self.midpoint = M
252+
self.ranges = R
253+
self.limit = L
254+
self.frequency = F
255+
self.bottom_cumulative_frequency = bot_CF
256+
self.top_cumulative_frequency = top_CF
257+
self.relative_frequency = RF
258+
self.percentage_relative_frequency = [f"{rf * 1:.2f}%" for rf in self.relative_frequency]
259+
self.mode = mode

0 commit comments

Comments
 (0)