11import pickle
22import numpy as np
33import pandas as pd
4+ from numpy .linalg import norm
45from sklearn .decomposition import TruncatedSVD
56from .cmtf import perform_CP , calcR2X
67
78
9+ def impute_missing_mat (dat ):
10+ miss_idx = np .where (~ np .isfinite (dat ))
11+ if len (miss_idx [0 ]) <= 0 :
12+ return dat
13+ assert np .all (np .any (np .isfinite (dat ), axis = 0 )), "Cannot impute if an entire column is empty"
14+ assert np .all (np .any (np .isfinite (dat ), axis = 1 )), "Cannot impute if an entire row is empty"
15+
16+ imp = np .copy (dat )
17+ col_mean = np .nanmean (dat , axis = 0 , keepdims = True )
18+ imp [miss_idx ] = np .take (col_mean , miss_idx [1 ])
19+
20+ diff = 1.0
21+ while diff > 1e-3 :
22+ tsvd = TruncatedSVD (n_components = min (dat .shape )- 1 )
23+ scores = tsvd .fit_transform (imp )
24+ loadings = tsvd .components_
25+ recon = scores @ loadings
26+ new_diff = norm (imp [miss_idx ] - recon [miss_idx ]) / norm (recon [miss_idx ])
27+ assert new_diff < diff , "Matrix imputation difference is not decreasing"
28+ diff = new_diff
29+ imp [miss_idx ] = recon [miss_idx ]
30+ return imp
31+
32+
833class Decomposition ():
934 def __init__ (self , data , max_rr = 6 ):
1035 self .data = data
@@ -20,6 +45,8 @@ def perform_tfac(self):
2045 def perform_PCA (self , flattenon = 0 ):
2146 dataShape = self .data .shape
2247 flatData = np .reshape (np .moveaxis (self .data , flattenon , 0 ), (dataShape [flattenon ], - 1 ))
48+ if not np .all (np .isfinite (flatData )):
49+ flatData = impute_missing_mat (flatData )
2350
2451 tsvd = TruncatedSVD (n_components = max (self .rrs ))
2552 scores = tsvd .fit_transform (flatData )
0 commit comments