1+ """
2+ README, Author - Md Ruman Islam (mailto:ruman23.github.io)
3+ Requirements:
4+ - numpy
5+ - matplotlib
6+ Python:
7+ - 3.8+
8+ Inputs:
9+ - X : a 2D numpy array of features.
10+ - n_components : number of Gaussian distributions (clusters) to fit.
11+ - max_iter : maximum number of EM iterations.
12+ - tol : convergence tolerance.
13+ Usage:
14+ 1. define 'n_components' value and 'X' features array
15+ 2. initialize model:
16+ gmm = GaussianMixture(n_components=3, max_iter=100)
17+ 3. fit model to data:
18+ gmm.fit(X)
19+ 4. get cluster predictions:
20+ labels = gmm.predict(X)
21+ 5. visualize results:
22+ gmm.plot_results(X)
23+ """
24+
25+ import warnings
26+ import numpy as np
27+ import matplotlib .pyplot as plt
28+ from scipy .stats import multivariate_normal
29+
30+ warnings .filterwarnings ("ignore" )
31+
32+ TAG = "GAUSSIAN-MIXTURE/ "
33+
34+
35+ class GaussianMixture :
36+ """
37+ Gaussian Mixture Model implemented using the Expectation-Maximization algorithm.
38+ """
39+
40+ def __init__ (self , n_components = 2 , max_iter = 100 , tol = 1e-4 , seed = None ):
41+ self .n_components = n_components
42+ self .max_iter = max_iter
43+ self .tol = tol
44+ self .seed = seed
45+
46+ # parameters
47+ self .weights_ = None
48+ self .means_ = None
49+ self .covariances_ = None
50+ self .log_likelihoods_ = []
51+
52+ def _initialize_parameters (self , X ):
53+ """Randomly initialize means, covariances, and mixture weights"""
54+ rng = np .random .default_rng (self .seed )
55+ n_samples , n_features = X .shape
56+
57+ indices = rng .choice (n_samples , self .n_components , replace = False )
58+ self .means_ = X [indices ]
59+
60+ self .covariances_ = np .array (
61+ [np .cov (X , rowvar = False ) for _ in range (self .n_components )]
62+ )
63+ self .weights_ = np .ones (self .n_components ) / self .n_components
64+
65+ def _e_step (self , X ):
66+ """Compute responsibilities (posterior probabilities)"""
67+ n_samples = X .shape [0 ]
68+ responsibilities = np .zeros ((n_samples , self .n_components ))
69+
70+ for k in range (self .n_components ):
71+ rv = multivariate_normal (mean = self .means_ [k ], cov = self .covariances_ [k ])
72+ responsibilities [:, k ] = self .weights_ [k ] * rv .pdf (X )
73+
74+ # Normalize to get probabilities
75+ responsibilities /= responsibilities .sum (axis = 1 , keepdims = True )
76+ return responsibilities
77+
78+ def _m_step (self , X , responsibilities ):
79+ """Update weights, means, and covariances"""
80+ n_samples , n_features = X .shape
81+ Nk = responsibilities .sum (axis = 0 )
82+
83+ self .weights_ = Nk / n_samples
84+ self .means_ = (responsibilities .T @ X ) / Nk [:, np .newaxis ]
85+
86+ for k in range (self .n_components ):
87+ diff = X - self .means_ [k ]
88+ self .covariances_ [k ] = (responsibilities [:, k ][:, np .newaxis ] * diff ).T @ diff
89+ self .covariances_ [k ] /= Nk [k ]
90+ # Add small regularization term for numerical stability
91+ self .covariances_ [k ] += np .eye (n_features ) * 1e-6
92+
93+ def _compute_log_likelihood (self , X ):
94+ """Compute total log-likelihood of the model"""
95+ n_samples = X .shape [0 ]
96+ total_pdf = np .zeros ((n_samples , self .n_components ))
97+
98+ for k in range (self .n_components ):
99+ rv = multivariate_normal (mean = self .means_ [k ], cov = self .covariances_ [k ])
100+ total_pdf [:, k ] = self .weights_ [k ] * rv .pdf (X )
101+
102+ log_likelihood = np .sum (np .log (np .sum (total_pdf , axis = 1 ) + 1e-12 ))
103+ return log_likelihood
104+
105+ def fit (self , X ):
106+ """Fit the Gaussian Mixture Model to data using the EM algorithm"""
107+ self ._initialize_parameters (X )
108+
109+ prev_log_likelihood = None
110+
111+ for i in range (self .max_iter ):
112+ # E-step
113+ responsibilities = self ._e_step (X )
114+
115+ # M-step
116+ self ._m_step (X , responsibilities )
117+
118+ # Log-likelihood
119+ log_likelihood = self ._compute_log_likelihood (X )
120+ self .log_likelihoods_ .append (log_likelihood )
121+
122+ if prev_log_likelihood is not None :
123+ if abs (log_likelihood - prev_log_likelihood ) < self .tol :
124+ print (f"{ TAG } Converged at iteration { i } ." )
125+ break
126+ prev_log_likelihood = log_likelihood
127+
128+ print (f"{ TAG } Training complete. Final log-likelihood: { log_likelihood :.4f} " )
129+
130+ def predict (self , X ):
131+ """Predict cluster assignment for each data point"""
132+ responsibilities = self ._e_step (X )
133+ return np .argmax (responsibilities , axis = 1 )
134+
135+ def plot_results (self , X ):
136+ """Visualize GMM clustering results (2D only)"""
137+ if X .shape [1 ] != 2 :
138+ print (f"{ TAG } Plotting only supported for 2D data." )
139+ return
140+
141+ labels = self .predict (X )
142+ plt .scatter (X [:, 0 ], X [:, 1 ], c = labels , cmap = "viridis" , s = 30 )
143+ plt .scatter (self .means_ [:, 0 ], self .means_ [:, 1 ], c = "red" , s = 100 , marker = "x" )
144+ plt .title ("Gaussian Mixture Model Clustering" )
145+ plt .xlabel ("Feature 1" )
146+ plt .ylabel ("Feature 2" )
147+ plt .show ()
148+
149+
150+ # Mock test
151+ if __name__ == "__main__" :
152+ from sklearn .datasets import make_blobs
153+
154+ X , _ = make_blobs (n_samples = 300 , centers = 3 , cluster_std = 1.2 , random_state = 42 )
155+ gmm = GaussianMixture (n_components = 3 , max_iter = 100 , seed = 42 )
156+ gmm .fit (X )
157+ labels = gmm .predict (X )
158+ gmm .plot_results (X )
0 commit comments