@@ -226,7 +226,14 @@ class FactorAnalyzer:
226226 rotation_matrix : np.array
227227 The rotation matrix, if a rotation
228228 has been performed.
229-
229+ structure : np.array or None
230+ The structure loading matrix.
231+ This only exists if the rotation
232+ is promax.
233+ psi : np.array or None
234+ The factor correlations
235+ matrix. This only exists
236+ if the rotation is oblique.
230237 Notes
231238 -----
232239 This code was partly derived from the excellent R package
@@ -270,6 +277,9 @@ def __init__(self,
270277 self .loadings = None
271278 self .rotation_matrix = None
272279
280+ self ._scale_mean = None
281+ self ._scale_std = None
282+
273283 @staticmethod
274284 def _fit_uls_objective (psi , corr_mtx , n_factors ):
275285 """
@@ -588,7 +598,7 @@ def fit_factor_analysis(self,
588598 'Check to make sure you do not have any '
589599 'features with zero standard deviation.' )
590600
591- corr = corr .values
601+ corr = corr .copy (). values
592602
593603 # if `use_smc` is True, get get squared multiple correlations
594604 # and use these as initial guesses for optimizer
@@ -647,7 +657,6 @@ def analyze(self,
647657 normalize = True ,
648658 impute = 'median' ,
649659 remove_non_numeric = True ,
650- use_scaling = True ,
651660 use_corr_matrix = False ,
652661 ** kwargs ):
653662 """
@@ -706,11 +715,6 @@ def analyze(self,
706715 Remove any non-numeric data. If `use_corr_matrix` is True,
707716 no non-numeric data will be removed.
708717 Defaults to True.
709- use_scaling : bool, optional
710- Whether to scale the data by subtracting out the mean
711- and dividing by the standard deviation. If `use_corr_matrix`
712- is True, scaling will not be performed.
713- Defaults to True.
714718 use_corr_matrix : bool, optional
715719 Set to true if the `data` is the correlation
716720 matrix.
@@ -772,16 +776,11 @@ def analyze(self,
772776 self .corr = df .copy ()
773777 else :
774778 self .corr = df .corr ()
775-
776- # scale the data, if it is not a correlation
777- # matrix and `use_scaling` is True
778- if use_scaling and not use_corr_matrix :
779- X = (df - df .mean (0 )) / df .std (0 )
780- else :
781- X = df .copy ()
779+ self ._scale_mean = df .mean (0 )
780+ self ._scale_std = df .std (0 )
782781
783782 # fit factor analysis model
784- loadings = self .fit_factor_analysis (X ,
783+ loadings = self .fit_factor_analysis (df . copy () ,
785784 n_factors ,
786785 use_smc ,
787786 bounds ,
@@ -883,7 +882,7 @@ def get_eigenvalues(self):
883882 """
884883 if (self .corr is not None and self .loadings is not None ):
885884
886- corr = self .corr .values
885+ corr = self .corr .copy (). values
887886
888887 e_values , _ = sp .linalg .eigh (corr )
889888 e_values = pd .DataFrame (e_values [::- 1 ],
@@ -1023,20 +1022,46 @@ def get_factor_variance(self):
10231022
10241023 return variance_info
10251024
1026- def get_scores (self , data ):
1025+ def get_scores (self ,
1026+ data ,
1027+ scale_mean = None ,
1028+ scale_std = None ):
10271029 """
10281030 Get the factor scores, given the data.
10291031
10301032 Parameters
10311033 ----------
10321034 data : pd.DataFrame
10331035 The data to calculate factor scores.
1036+ scale_mean : float or None
1037+ The mean of the original
1038+ data set used to fit the
1039+ factor model. If None, attempt
1040+ to retrieve the mean from the
1041+ original `analyze()` method,
1042+ if it was saved.
1043+ Defaults to None.
1044+ scale_std : float or None
1045+ The standard deviation of the original
1046+ data set used to fit the
1047+ factor model. If None, attempt
1048+ to retrieve the standard deviation from the
1049+ original `analyze()` method,
1050+ if it was saved.
1051+ Defaults to None.
10341052
10351053 Returns
10361054 -------
10371055 scores : pd.DataFrame
10381056 The factor scores.
10391057
1058+ Raises
1059+ ------
1060+ ValueError
1061+ If either scale_std or scale_mean
1062+ is None, and the original mean or standard
1063+ deviation were not saved during fitting.
1064+
10401065 Examples
10411066 --------
10421067 >>> import pandas as pd
@@ -1055,11 +1080,38 @@ def get_scores(self, data):
10551080 if self .loadings is not None :
10561081
10571082 df = data .copy ()
1058- corr = data .corr ()
1083+ corr = self .corr .copy ()
1084+
1085+ error_msg = ('The `{}` argument is None, but no scaled {} '
1086+ 'was saved when fitting your original factor '
1087+ 'model. This most likely because you used a '
1088+ 'correlation matrix, rather than the full data '
1089+ 'set. Please either pass a value for `{}` '
1090+ 'or re-fit your model using the full data set.' )
1091+
1092+ # if no scaled mean is passed, use the mean from the
1093+ # original fitting procedure; otherwise, raise an error
1094+ if scale_mean is None and self ._scale_mean is not None :
1095+ scale_mean = self ._scale_mean
1096+ elif scale_mean is None and self ._scale_mean is None :
1097+ raise ValueError (error_msg .format ('scale_mean' ,
1098+ 'mean' ,
1099+ 'scale_mean' ))
1100+
1101+ # if no scaled std is passed, use the std from the
1102+ # original fitting procedure; otherwise, raise an error
1103+ if scale_std is None and self ._scale_std is not None :
1104+ scale_std = self ._scale_std
1105+ elif scale_std is None and self ._scale_std is None :
1106+ raise ValueError (error_msg .format ('scale_std' ,
1107+ 'standard deviation' ,
1108+ 'scale_std' ))
10591109
10601110 # scale the data
1061- X = (df - df . mean ( 0 )) / df . std ( 0 )
1111+ X = (df - scale_mean ) / scale_std
10621112
1113+ # use the structure matrix, if it exists;
1114+ # otherwise, just use the loadings matrix
10631115 if self .structure is not None :
10641116 structure = self .structure
10651117 else :
0 commit comments