@@ -148,7 +148,14 @@ class FactorAnalyzer:
148148 rotation_matrix : np.array
149149 The rotation matrix, if a rotation
150150 has been performed.
151-
151+ structure : np.array or None
152+ The structure loading matrix.
153+ This only exists if the rotation
154+ is promax.
155+ psi : np.array or None
156+ The factor correlations
157+ matrix. This only exists
158+ if the rotation is oblique.
152159 Notes
153160 -----
154161 This code was partly derived from the excellent R package
@@ -192,6 +199,9 @@ def __init__(self,
192199 self .loadings = None
193200 self .rotation_matrix = None
194201
202+ self ._scale_mean = None
203+ self ._scale_std = None
204+
195205 @staticmethod
196206 def _fit_uls_objective (psi , corr_mtx , n_factors ):
197207 """
@@ -510,7 +520,7 @@ def fit_factor_analysis(self,
510520 'Check to make sure you do not have any '
511521 'features with zero standard deviation.' )
512522
513- corr = corr .values
523+ corr = corr .copy (). values
514524
515525 # if `use_smc` is True, get get squared multiple correlations
516526 # and use these as initial guesses for optimizer
@@ -569,7 +579,6 @@ def analyze(self,
569579 normalize = True ,
570580 impute = 'median' ,
571581 remove_non_numeric = True ,
572- use_scaling = True ,
573582 use_corr_matrix = False ,
574583 ** kwargs ):
575584 """
@@ -628,11 +637,6 @@ def analyze(self,
628637 Remove any non-numeric data. If `use_corr_matrix` is True,
629638 no non-numeric data will be removed.
630639 Defaults to True.
631- use_scaling : bool, optional
632- Whether to scale the data by subtracting out the mean
633- and dividing by the standard deviation. If `use_corr_matrix`
634- is True, scaling will not be performed.
635- Defaults to True.
636640 use_corr_matrix : bool, optional
637641 Set to true if the `data` is the correlation
638642 matrix.
@@ -694,16 +698,11 @@ def analyze(self,
694698 self .corr = df .copy ()
695699 else :
696700 self .corr = df .corr ()
697-
698- # scale the data, if it is not a correlation
699- # matrix and `use_scaling` is True
700- if use_scaling and not use_corr_matrix :
701- X = (df - df .mean (0 )) / df .std (0 )
702- else :
703- X = df .copy ()
701+ self ._scale_mean = df .mean (0 )
702+ self ._scale_std = df .std (0 )
704703
705704 # fit factor analysis model
706- loadings = self .fit_factor_analysis (X ,
705+ loadings = self .fit_factor_analysis (df . copy () ,
707706 n_factors ,
708707 use_smc ,
709708 bounds ,
@@ -805,7 +804,7 @@ def get_eigenvalues(self):
805804 """
806805 if (self .corr is not None and self .loadings is not None ):
807806
808- corr = self .corr .values
807+ corr = self .corr .copy (). values
809808
810809 e_values , _ = sp .linalg .eigh (corr )
811810 e_values = pd .DataFrame (e_values [::- 1 ],
@@ -945,20 +944,46 @@ def get_factor_variance(self):
945944
946945 return variance_info
947946
948- def get_scores (self , data ):
947+ def get_scores (self ,
948+ data ,
949+ scale_mean = None ,
950+ scale_std = None ):
949951 """
950952 Get the factor scores, given the data.
951953
952954 Parameters
953955 ----------
954956 data : pd.DataFrame
955957 The data to calculate factor scores.
958+ scale_mean : float or None
959+ The mean of the original
960+ data set used to fit the
961+ factor model. If None, attempt
962+ to retrieve the mean from the
963+ original `analyze()` method,
964+ if it was saved.
965+ Defaults to None.
966+ scale_std : float or None
967+ The standard deviation of the original
968+ data set used to fit the
969+ factor model. If None, attempt
970+ to retrieve the standard deviation from the
971+ original `analyze()` method,
972+ if it was saved.
973+ Defaults to None.
956974
957975 Returns
958976 -------
959977 scores : pd.DataFrame
960978 The factor scores.
961979
980+ Raises
981+ ------
982+ ValueError
983+ If either scale_std or scale_mean
984+ is None, and the original mean or standard
985+ deviation were not saved during fitting.
986+
962987 Examples
963988 --------
964989 >>> import pandas as pd
@@ -977,11 +1002,38 @@ def get_scores(self, data):
9771002 if self .loadings is not None :
9781003
9791004 df = data .copy ()
980- corr = data .corr ()
1005+ corr = self .corr .copy ()
1006+
1007+ error_msg = ('The `{}` argument is None, but no scaled {} '
1008+ 'was saved when fitting your original factor '
1009+ 'model. This most likely because you used a '
1010+ 'correlation matrix, rather than the full data '
1011+ 'set. Please either pass a value for `{}` '
1012+ 'or re-fit your model using the full data set.' )
1013+
1014+ # if no scaled mean is passed, use the mean from the
1015+ # original fitting procedure; otherwise, raise an error
1016+ if scale_mean is None and self ._scale_mean is not None :
1017+ scale_mean = self ._scale_mean
1018+ elif scale_mean is None and self ._scale_mean is None :
1019+ raise ValueError (error_msg .format ('scale_mean' ,
1020+ 'mean' ,
1021+ 'scale_mean' ))
1022+
1023+ # if no scaled std is passed, use the std from the
1024+ # original fitting procedure; otherwise, raise an error
1025+ if scale_std is None and self ._scale_std is not None :
1026+ scale_std = self ._scale_std
1027+ elif scale_std is None and self ._scale_std is None :
1028+ raise ValueError (error_msg .format ('scale_std' ,
1029+ 'standard deviation' ,
1030+ 'scale_std' ))
9811031
9821032 # scale the data
983- X = (df - df . mean ( 0 )) / df . std ( 0 )
1033+ X = (df - scale_mean ) / scale_std
9841034
1035+ # use the structure matrix, if it exists;
1036+ # otherwise, just use the loadings matrix
9851037 if self .structure is not None :
9861038 structure = self .structure
9871039 else :
0 commit comments