ngupta23
diff --git a/‎README.md‎
Lines changed: 6 additions & 1 deletion b/‎README.md‎
Lines changed: 6 additions & 1 deletion
diff --git a/‎build/lib/more/scikit_helper/cluster/AgglomerativeHelper.py‎
Lines changed: 19 additions & 0 deletions b/‎build/lib/more/scikit_helper/cluster/AgglomerativeHelper.py‎
Lines changed: 19 additions & 0 deletions
diff --git a/‎build/lib/more/scikit_helper/cluster/BaseClusterWithN.py‎
Lines changed: 96 additions & 0 deletions b/‎build/lib/more/scikit_helper/cluster/BaseClusterWithN.py‎
Lines changed: 96 additions & 0 deletions
diff --git a/‎build/lib/more/scikit_helper/cluster/GaussianMixture.py‎
Lines changed: 0 additions & 1 deletion b/‎build/lib/more/scikit_helper/cluster/GaussianMixture.py‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎build/lib/more/scikit_helper/cluster/KMeansHelper.py‎
Lines changed: 19 additions & 0 deletions b/‎build/lib/more/scikit_helper/cluster/KMeansHelper.py‎
Lines changed: 19 additions & 0 deletions
diff --git a/‎build/lib/more/scikit_helper/cluster/__init__.py‎
Lines changed: 3 additions & 0 deletions b/‎build/lib/more/scikit_helper/cluster/__init__.py‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎dist/more-0.0.1b8-py3-none-any.whl‎
24.4 KB b/‎dist/more-0.0.1b8-py3-none-any.whl‎
24.4 KB
diff --git a/‎dist/more-0.0.1b8.tar.gz‎
15.2 KB b/‎dist/more-0.0.1b8.tar.gz‎
15.2 KB
diff --git a/‎examples/scikit-helper/.ipynb_checkpoints/KMeansHelper-checkpoint.ipynb‎
Lines changed: 68 additions & 0 deletions b/‎examples/scikit-helper/.ipynb_checkpoints/KMeansHelper-checkpoint.ipynb‎
Lines changed: 68 additions & 0 deletions
diff --git a/‎examples/scikit-helper/KMeansHelper.ipynb‎
Lines changed: 68 additions & 0 deletions b/‎examples/scikit-helper/KMeansHelper.ipynb‎
Lines changed: 68 additions & 0 deletions
@@ -4,7 +4,8 @@ This is a helper package for a variety of functions as described in the Overview
 
 # Installation
 
-pip install more==0.0.1b7
+* For standard installation > pip install more 
+* For installing a particular version >  pip install more==0.0.1b8
 
 # Overview
 
@@ -18,6 +19,10 @@ Check out the  [examples](https://github.com/ngupta23/more/tree/master/examples)
 
 # Version History
 
+## 0.0.1b8
+
+* Added Cluster Helpers for KMeans and Agglomerative Clustering
+
 ## 0.0.1b7
 
 * Added Cluster Helper for Gaussian Clusters
 
@@ -0,0 +1,19 @@
+from sklearn.cluster import AgglomerativeClustering
+from . import BaseClusterWithN
+
+class AgglomerativeHelper(BaseClusterWithN):
+    def __init__(self, X, n_clusters, linkage = "ward", scaled = True, random_state = 101):
+        """
+        Class to train and evaluate a Agglomerative (Hierarchical) Cluster Model
+        """
+        super().__init__(X=X, n_clusters=n_clusters, random_state = random_state)
+        self.linkage = linkage
+        self.cluster_obj = AgglomerativeClustering(n_clusters=self.n_clusters, linkage=self.linkage)
+        
+        
+                    
+    
+        
+    
+        
+            
@@ -0,0 +1,96 @@
+import warnings
+import pandas as pd
+from sklearn.preprocessing import StandardScaler
+from sklearn import metrics
+from more import viz_helper as vh
+
+class BaseClusterWithN:
+    def __init__(self, X, n_clusters, scaled = True, random_state = 101):
+        """
+        Class to train and evaluate a Base Cluster Class with Number of Clusters Specified
+        """
+        self.X = X.reset_index(drop=True)
+        self.n_clusters = n_clusters
+        self.scaled = scaled
+        self.random_state = random_state
+        self.cluster_obj = None # Define in child class
+        self.labels = None
+        self.silhoutte_score = None;
+        self.merged_data = None
+        self.merged_scaled_data = None
+        self.columns = self.X.columns
+        
+        std_scl = StandardScaler()
+        self.X_scaled = pd.DataFrame(std_scl.fit_transform(self.X), columns=self.columns)
+                    
+    def train(self, merge = True):
+        """
+        Train the clustering method
+        """
+        if (self.scaled):
+            self.cluster_obj.fit(self.X_scaled)
+        else:
+            self.cluster_obj.fit(self.X)
+            
+        self.labels = self.cluster_obj.labels_
+        
+        if (merge):
+            self.merge_data_labels()
+            
+        return(self) # Allows to cascade methods
+            
+    def evaluate(self, metric = "silhoutte"):
+        """
+        Provides the Goodness of Fit Statistics for the clustering algorithm
+        """
+        if (metric == "silhoutte"):
+            if (self.scaled):
+                self.silhoutte_score = metrics.silhouette_score(self.X_scaled, self.labels, random_state= self.random_state)
+            else:
+                self.silhoutte_score = metrics.silhouette_score(self.X, self.labels, random_state= self.random_state)
+        else:
+            warnings.warn("Metrix {} is not supported".format(metric))
+
+        print("Silhouette Coefficient: {}".format(self.silhoutte_score))
+        
+    def merge_data_labels(self):
+        self.merged_data = pd.concat([self.X,pd.Series(self.labels,name='labels')], axis = 1)
+        self.merged_scaled_data = pd.concat([self.X_scaled,pd.Series(self.labels,name='labels')], axis = 1)
+        
+    def cluster_obs_count(self):
+        """
+        Gives the number of observations in each cluster
+        """
+        return(self.merged_data.groupby('labels').count().transpose().iloc[0,:])
+        
+    def cluster_means(self):
+        """
+        Provides the means of the cluster features for each cluster
+        """
+        return(self.merged_data.groupby('labels').mean().transpose())
+        
+    def cluster_means_scaled(self):
+        """
+        Provides the means (scaled) of the cluster features for each cluster
+        """
+        return(self.merged_data.groupby('labels').mean().transpose())
+        
+    def plot_parallel_coordinates(self, scaled=True, frac=0.05, figsize=(12,6), xrot=0):
+        """
+        Plot the parallel coordinate plots for the features in each cluster
+        """
+        if (scaled):
+            vh.plot_parallel_coordinates(data = self.merged_scaled_data, by = 'labels', normalize=False, frac=frac, figsize=figsize, xrot=xrot)
+        else:
+            vh.plot_parallel_coordinates(data = self.merged_data, by = 'labels', normalize=False, frac=frac, figsize=figsize, xrot=xrot)
+            
+        
+    
+        
+        
+            
+        
+        
+    
+        
+            
@@ -1,5 +1,4 @@
 import numpy as np
-import pandas as pd
 import matplotlib.pyplot as plt
 
 # this code has been manipulated from the source available on sklearn's website documentation
 
@@ -0,0 +1,19 @@
+from sklearn.cluster import KMeans
+from . import BaseClusterWithN
+
+class KMeansHelper(BaseClusterWithN):
+    def __init__(self, X, n_clusters, init = "k-means++", n_jobs = None, scaled = True, random_state = 101):
+        """
+        Class to train and evaluate a KMeans Cluster Model
+        """
+        super().__init__(X=X, n_clusters=n_clusters, random_state = random_state)
+        self.init = init
+        self.n_jobs = n_jobs
+        self.cluster_obj = KMeans(n_clusters=self.n_clusters, init=self.init, random_state=self.random_state, n_jobs=self.n_jobs)
+        
+                    
+    
+        
+    
+        
+            
@@ -1 +1,4 @@
 from .GaussianMixture import GaussianMixtureHelper 
+from .BaseClusterWithN import BaseClusterWithN 
+from .KMeansHelper import KMeansHelper
+from .AgglomerativeHelper import AgglomerativeHelper
@@ -0,0 +1,68 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import sys\n",
+    "sys.path.append(\"C:/Users/Nikhil/Google Drive Work/MyPythonLibraries/more/more\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from scikit_helper.cluster import KMeansHelper as kmh"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Incomplete example - TBD\n",
+    "#kmeans = kmh()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.8"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
@@ -0,0 +1,68 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import sys\n",
+    "sys.path.append(\"C:/Users/Nikhil/Google Drive Work/MyPythonLibraries/more/more\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from scikit_helper.cluster import KMeansHelper as kmh"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Incomplete example - TBD\n",
+    "#kmeans = kmh()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.8"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
Original file line number	Diff line number	Diff line change
`@@ -1,5 +1,4 @@`
`1`	`1`	`import numpy as np`
`2`		`-import pandas as pd`
`3`	`2`	`import matplotlib.pyplot as plt`
`4`	`3`
`5`	`4`	`# this code has been manipulated from the source available on sklearn's website documentation`