99import matplotlib .pyplot as plt
1010import matplotlib .ticker as plticker
1111import numpy as np
12+ from numpy .typing import NDArray
1213import pandas as pd
1314import scipy
1415from mpl_toolkits .axes_grid1 import make_axes_locatable
@@ -155,53 +156,72 @@ def plot_images(
155156
156157
157158def make_ellipses (
158- X : np .ndarray ,
159+ x : NDArray ,
160+ y : NDArray ,
159161 ax : mpl .axes .Axes ,
160- color : Union [str , Tuple [float , float , float ]],
162+ n_std : float = 2 ,
163+ color : Union [str , Any , Tuple [float , float , float ]] = "None" ,
161164):
162- """Draw ellipses on a figure
165+ """
166+ Create a plot of the covariance confidence ellipse of *x* and *y*.
163167
164168 Parameters
165169 ----------
166- X : np.ndarray
167- array for the ellipse
168- ax : matplotlib.axes._subplots.AxesSubplot
169- matplotlib ax handles
170- color : Union[str, Tuple[float, float, float]]
171- ellipse's color
170+ x, y : array-like, shape (n, )
171+ Input data.
172+
173+ ax : matplotlib.axes.Axes
174+ The axes object to draw the ellipse into.
175+
176+ n_std : float
177+ The number of standard deviations to determine the ellipse's radiuses.
178+
179+ color : Optional[str]
180+ facecolor
181+
182+ Returns
183+ -------
184+ matplotlib.patches.Ellipse
172185 """
173- covariances = np .cov (X ) # gmm.covariances_[0] # [n][:2, :2]
174- v , w = np .linalg .eigh (covariances )
175- u = w [0 ] / np .linalg .norm (w [0 ])
176- angle = np .arctan2 (u [1 ], u [0 ])
177- angle = 180 * angle / np .pi # convert to degrees
178- center = X .mean (axis = 0 ) # .means_[0]
179- v [v < 0 ] = np .nan
180- v = 2.0 * np .sqrt (2.0 ) * np .sqrt (v )
181- ell = mpl .patches .Ellipse (center , v [0 ], v [1 ], angle = 180 + angle , color = color )
186+ if x .size != y .size :
187+ raise ValueError ("x and y must be the same size" )
188+
189+ cov = np .cov (x , y )
190+ pearson = cov [0 , 1 ] / np .sqrt (cov [0 , 0 ] * cov [1 , 1 ])
191+ ell_radius_x = np .sqrt (1 + pearson ) * 2.5
192+ ell_radius_y = np .sqrt (1 - pearson ) * 2.5
193+ ell = mpl .patches .Ellipse ((0 , 0 ), width = ell_radius_x , height = ell_radius_y , facecolor = color )
194+ scale_x = np .sqrt (cov [0 , 0 ]) * n_std
195+ mean_x = np .mean (x )
196+ scale_y = np .sqrt (cov [1 , 1 ]) * n_std
197+ mean_y = np .mean (y )
198+ transf = (
199+ mpl .transforms .Affine2D ().rotate_deg (45 ).scale (scale_x , scale_y ).translate (mean_x , mean_y )
200+ )
201+ ell .set_transform (transf + ax .transData )
202+ ax .add_patch (ell )
182203 ell .set_clip_box (ax .bbox )
183- ell .set_alpha (0.5 )
184- ax .add_artist (ell )
204+ ell .set_alpha (0.4 )
185205 ax .set_aspect ("equal" , "datalim" )
186206
187207
188208def compare_covariances (
189- df1 : pd .DataFrame ,
190- df2 : pd .DataFrame ,
209+ df_1 : pd .DataFrame ,
210+ df_2 : pd .DataFrame ,
191211 col_x : str ,
192212 col_y : str ,
193- ax : mpl .axes ._subplots . AxesSubplot ,
213+ ax : mpl .axes .Axes ,
194214 label : str = "" ,
195- color : Optional [ Union [str , Tuple [float , float , float ]]] = None ,
215+ color : Union [None , str , Tuple [float , float , float ], Tuple [ float , float , float , float ]] = None ,
196216):
197217 """
198218 Covariance plot: scatter plot with ellipses
199219
200220 Parameters
201221 ----------
202- df1 : pd.DataFrame
222+ df_1 : pd.DataFrame
203223 dataframe with raw data
204- df2 : pd.DataFrame
224+ df_2 : pd.DataFrame
205225 dataframe with imputations
206226 col_x : str
207227 variable x, column's name of dataframe df1 to compare with
@@ -210,12 +230,14 @@ def compare_covariances(
210230 ax : matplotlib.axes._subplots.AxesSubplot
211231 matplotlib ax handles
212232 """
233+ df1 = df_1 .dropna ()
234+ df2 = df_2 .dropna ()
213235 if color is None :
214236 color = tab10 (0 )
215- ax .scatter (df2 [col_x ], df2 [col_y ], marker = "." , color = color , s = 0.2 , label = label )
216- ax .scatter (df1 [col_x ], df1 [col_y ], marker = "." , color = "black" , s = 0.2 )
217- make_ellipses (df1 [[ col_x , col_y ]] , ax , "black" )
218- make_ellipses (df2 [[ col_x , col_y ]] , ax , color )
237+ ax .scatter (df2 [col_x ], df2 [col_y ], marker = "." , color = color , s = 2 , alpha = 0.7 , label = "imputed" )
238+ ax .scatter (df1 [col_x ], df1 [col_y ], marker = "." , color = "black" , s = 2 , alpha = 0.7 , label = "original" )
239+ make_ellipses (df1 [col_x ], df1 [ col_y ], ax , color = "black" )
240+ make_ellipses (df2 [col_x ], df2 [ col_y ], ax , color = color )
219241 ax .set_xlabel (col_x )
220242 ax .set_ylabel (col_y )
221243
0 commit comments