Skip to content

Commit ac86a60

Browse files
committed
0.0.1b8 release
1 parent e0ef4bc commit ac86a60

23 files changed

+427
-6
lines changed

README.md

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@ This is a helper package for a variety of functions as described in the Overview
44

55
# Installation
66

7-
pip install more==0.0.1b7
7+
* For standard installation > pip install more
8+
* For installing a particular version > pip install more==0.0.1b8
89

910
# Overview
1011

@@ -18,6 +19,10 @@ Check out the [examples](https://github.com/ngupta23/more/tree/master/examples)
1819

1920
# Version History
2021

22+
## 0.0.1b8
23+
24+
* Added Cluster Helpers for KMeans and Agglomerative Clustering
25+
2126
## 0.0.1b7
2227

2328
* Added Cluster Helper for Gaussian Clusters
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
from sklearn.cluster import AgglomerativeClustering
2+
from . import BaseClusterWithN
3+
4+
class AgglomerativeHelper(BaseClusterWithN):
5+
def __init__(self, X, n_clusters, linkage = "ward", scaled = True, random_state = 101):
6+
"""
7+
Class to train and evaluate a Agglomerative (Hierarchical) Cluster Model
8+
"""
9+
super().__init__(X=X, n_clusters=n_clusters, random_state = random_state)
10+
self.linkage = linkage
11+
self.cluster_obj = AgglomerativeClustering(n_clusters=self.n_clusters, linkage=self.linkage)
12+
13+
14+
15+
16+
17+
18+
19+
Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
import warnings
2+
import pandas as pd
3+
from sklearn.preprocessing import StandardScaler
4+
from sklearn import metrics
5+
from more import viz_helper as vh
6+
7+
class BaseClusterWithN:
8+
def __init__(self, X, n_clusters, scaled = True, random_state = 101):
9+
"""
10+
Class to train and evaluate a Base Cluster Class with Number of Clusters Specified
11+
"""
12+
self.X = X.reset_index(drop=True)
13+
self.n_clusters = n_clusters
14+
self.scaled = scaled
15+
self.random_state = random_state
16+
self.cluster_obj = None # Define in child class
17+
self.labels = None
18+
self.silhoutte_score = None;
19+
self.merged_data = None
20+
self.merged_scaled_data = None
21+
self.columns = self.X.columns
22+
23+
std_scl = StandardScaler()
24+
self.X_scaled = pd.DataFrame(std_scl.fit_transform(self.X), columns=self.columns)
25+
26+
def train(self, merge = True):
27+
"""
28+
Train the clustering method
29+
"""
30+
if (self.scaled):
31+
self.cluster_obj.fit(self.X_scaled)
32+
else:
33+
self.cluster_obj.fit(self.X)
34+
35+
self.labels = self.cluster_obj.labels_
36+
37+
if (merge):
38+
self.merge_data_labels()
39+
40+
return(self) # Allows to cascade methods
41+
42+
def evaluate(self, metric = "silhoutte"):
43+
"""
44+
Provides the Goodness of Fit Statistics for the clustering algorithm
45+
"""
46+
if (metric == "silhoutte"):
47+
if (self.scaled):
48+
self.silhoutte_score = metrics.silhouette_score(self.X_scaled, self.labels, random_state= self.random_state)
49+
else:
50+
self.silhoutte_score = metrics.silhouette_score(self.X, self.labels, random_state= self.random_state)
51+
else:
52+
warnings.warn("Metrix {} is not supported".format(metric))
53+
54+
print("Silhouette Coefficient: {}".format(self.silhoutte_score))
55+
56+
def merge_data_labels(self):
57+
self.merged_data = pd.concat([self.X,pd.Series(self.labels,name='labels')], axis = 1)
58+
self.merged_scaled_data = pd.concat([self.X_scaled,pd.Series(self.labels,name='labels')], axis = 1)
59+
60+
def cluster_obs_count(self):
61+
"""
62+
Gives the number of observations in each cluster
63+
"""
64+
return(self.merged_data.groupby('labels').count().transpose().iloc[0,:])
65+
66+
def cluster_means(self):
67+
"""
68+
Provides the means of the cluster features for each cluster
69+
"""
70+
return(self.merged_data.groupby('labels').mean().transpose())
71+
72+
def cluster_means_scaled(self):
73+
"""
74+
Provides the means (scaled) of the cluster features for each cluster
75+
"""
76+
return(self.merged_data.groupby('labels').mean().transpose())
77+
78+
def plot_parallel_coordinates(self, scaled=True, frac=0.05, figsize=(12,6), xrot=0):
79+
"""
80+
Plot the parallel coordinate plots for the features in each cluster
81+
"""
82+
if (scaled):
83+
vh.plot_parallel_coordinates(data = self.merged_scaled_data, by = 'labels', normalize=False, frac=frac, figsize=figsize, xrot=xrot)
84+
else:
85+
vh.plot_parallel_coordinates(data = self.merged_data, by = 'labels', normalize=False, frac=frac, figsize=figsize, xrot=xrot)
86+
87+
88+
89+
90+
91+
92+
93+
94+
95+
96+

build/lib/more/scikit_helper/cluster/GaussianMixture.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import numpy as np
2-
import pandas as pd
32
import matplotlib.pyplot as plt
43

54
# this code has been manipulated from the source available on sklearn's website documentation
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
from sklearn.cluster import KMeans
2+
from . import BaseClusterWithN
3+
4+
class KMeansHelper(BaseClusterWithN):
5+
def __init__(self, X, n_clusters, init = "k-means++", n_jobs = None, scaled = True, random_state = 101):
6+
"""
7+
Class to train and evaluate a KMeans Cluster Model
8+
"""
9+
super().__init__(X=X, n_clusters=n_clusters, random_state = random_state)
10+
self.init = init
11+
self.n_jobs = n_jobs
12+
self.cluster_obj = KMeans(n_clusters=self.n_clusters, init=self.init, random_state=self.random_state, n_jobs=self.n_jobs)
13+
14+
15+
16+
17+
18+
19+
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,4 @@
11
from .GaussianMixture import GaussianMixtureHelper
2+
from .BaseClusterWithN import BaseClusterWithN
3+
from .KMeansHelper import KMeansHelper
4+
from .AgglomerativeHelper import AgglomerativeHelper

dist/more-0.0.1b8-py3-none-any.whl

24.4 KB
Binary file not shown.

dist/more-0.0.1b8.tar.gz

15.2 KB
Binary file not shown.
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 3,
6+
"metadata": {},
7+
"outputs": [],
8+
"source": [
9+
"import sys\n",
10+
"sys.path.append(\"C:/Users/Nikhil/Google Drive Work/MyPythonLibraries/more/more\")"
11+
]
12+
},
13+
{
14+
"cell_type": "code",
15+
"execution_count": 4,
16+
"metadata": {},
17+
"outputs": [],
18+
"source": [
19+
"from scikit_helper.cluster import KMeansHelper as kmh"
20+
]
21+
},
22+
{
23+
"cell_type": "code",
24+
"execution_count": 6,
25+
"metadata": {},
26+
"outputs": [],
27+
"source": [
28+
"# Incomplete example - TBD\n",
29+
"#kmeans = kmh()"
30+
]
31+
},
32+
{
33+
"cell_type": "code",
34+
"execution_count": null,
35+
"metadata": {},
36+
"outputs": [],
37+
"source": []
38+
},
39+
{
40+
"cell_type": "code",
41+
"execution_count": null,
42+
"metadata": {},
43+
"outputs": [],
44+
"source": []
45+
}
46+
],
47+
"metadata": {
48+
"kernelspec": {
49+
"display_name": "Python 3",
50+
"language": "python",
51+
"name": "python3"
52+
},
53+
"language_info": {
54+
"codemirror_mode": {
55+
"name": "ipython",
56+
"version": 3
57+
},
58+
"file_extension": ".py",
59+
"mimetype": "text/x-python",
60+
"name": "python",
61+
"nbconvert_exporter": "python",
62+
"pygments_lexer": "ipython3",
63+
"version": "3.6.8"
64+
}
65+
},
66+
"nbformat": 4,
67+
"nbformat_minor": 2
68+
}
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 3,
6+
"metadata": {},
7+
"outputs": [],
8+
"source": [
9+
"import sys\n",
10+
"sys.path.append(\"C:/Users/Nikhil/Google Drive Work/MyPythonLibraries/more/more\")"
11+
]
12+
},
13+
{
14+
"cell_type": "code",
15+
"execution_count": 4,
16+
"metadata": {},
17+
"outputs": [],
18+
"source": [
19+
"from scikit_helper.cluster import KMeansHelper as kmh"
20+
]
21+
},
22+
{
23+
"cell_type": "code",
24+
"execution_count": 6,
25+
"metadata": {},
26+
"outputs": [],
27+
"source": [
28+
"# Incomplete example - TBD\n",
29+
"#kmeans = kmh()"
30+
]
31+
},
32+
{
33+
"cell_type": "code",
34+
"execution_count": null,
35+
"metadata": {},
36+
"outputs": [],
37+
"source": []
38+
},
39+
{
40+
"cell_type": "code",
41+
"execution_count": null,
42+
"metadata": {},
43+
"outputs": [],
44+
"source": []
45+
}
46+
],
47+
"metadata": {
48+
"kernelspec": {
49+
"display_name": "Python 3",
50+
"language": "python",
51+
"name": "python3"
52+
},
53+
"language_info": {
54+
"codemirror_mode": {
55+
"name": "ipython",
56+
"version": 3
57+
},
58+
"file_extension": ".py",
59+
"mimetype": "text/x-python",
60+
"name": "python",
61+
"nbconvert_exporter": "python",
62+
"pygments_lexer": "ipython3",
63+
"version": "3.6.8"
64+
}
65+
},
66+
"nbformat": 4,
67+
"nbformat_minor": 2
68+
}

0 commit comments

Comments
 (0)